From 96c7dd87811a42affda8f20d997bbf736cde9fec Mon Sep 17 00:00:00 2001 From: kejingfan Date: Sun, 21 Sep 2025 03:04:17 +0000 Subject: [PATCH] Remove .ipynb_checkpoints from version control --- .../Pytorch基本操作实验报告-checkpoint.ipynb | 1609 ------- .../前馈神经网络实验-checkpoint.ipynb | 3881 ----------------- .../卷积神经网络实验-checkpoint.ipynb | 1692 ------- .../循环神经网络实验-checkpoint.ipynb | 1350 ------ 4 files changed, 8532 deletions(-) delete mode 100644 Lab1/.ipynb_checkpoints/Pytorch基本操作实验报告-checkpoint.ipynb delete mode 100644 Lab2/.ipynb_checkpoints/前馈神经网络实验-checkpoint.ipynb delete mode 100644 Lab3/.ipynb_checkpoints/卷积神经网络实验-checkpoint.ipynb delete mode 100644 Lab4/.ipynb_checkpoints/循环神经网络实验-checkpoint.ipynb diff --git a/Lab1/.ipynb_checkpoints/Pytorch基本操作实验报告-checkpoint.ipynb b/Lab1/.ipynb_checkpoints/Pytorch基本操作实验报告-checkpoint.ipynb deleted file mode 100644 index a9fe639..0000000 --- a/Lab1/.ipynb_checkpoints/Pytorch基本操作实验报告-checkpoint.ipynb +++ /dev/null @@ -1,1609 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "a07da69b-4328-420a-81f7-8ea7a78748e6", - "metadata": {}, - "source": [ - "

研究生《深度学习》课程
实验报告

\n", - "
\n", - "
课程名称:深度学习 M502019B
\n", - "
实验题目:Pytorch基本操作实验
\n", - "
学号:25120323
\n", - "
姓名:柯劲帆
\n", - "
授课老师:原继东
\n", - "
报告日期:2025年7月28日
\n", - "
" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "a4e12268-bad4-44c4-92d5-883624d93e25", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Pytorch version: 2.7.1+cu118\n", - "CUDA version: 11.8\n", - "CUDA device count: 1\n", - "CUDA device name: NVIDIA TITAN Xp\n", - "CUDA device capability: (6, 1)\n", - "CUDA device memory: 11.90 GB\n", - "CPU count: 8\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "import torch\n", - "from torch.autograd import Variable\n", - "from torch.utils.data import Dataset, DataLoader, random_split\n", - "from torch import nn\n", - "from torchvision import datasets, transforms\n", - "from multiprocessing import cpu_count\n", - "import matplotlib.pyplot as plt\n", - "from tqdm.notebook import tqdm\n", - "from typing import Literal, Union\n", - "\n", - "print('Pytorch version:',torch.__version__)\n", - "if not torch.cuda.is_available():\n", - " print('CUDA is_available:', torch.cuda.is_available())\n", - "else:\n", - " print('CUDA version:', torch.version.cuda)\n", - " print('CUDA device count:', torch.cuda.device_count())\n", - " print('CUDA device name:', torch.cuda.get_device_name())\n", - " print('CUDA device capability:', torch.cuda.get_device_capability())\n", - " print('CUDA device memory:', f'{torch.cuda.get_device_properties(0).total_memory/1024/1024/1024:.2f}', 'GB')\n", - "print('CPU count:', cpu_count())\n", - "\n", - "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", - "seed = 42\n", - "np.random.seed(seed)\n", - "torch.manual_seed(seed)\n", - "torch.cuda.manual_seed(seed)" - ] - }, - { - "cell_type": "markdown", - "id": "59a43d35-56ac-4ade-995d-1c6fcbcd1262", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "# 一、Pytorch基本操作考察\n", - "## 题目1\n", - "**使用 𝐓𝐞𝐧𝐬𝐨𝐫 初始化一个 𝟏×𝟑 的矩阵 𝑴 和一个 𝟐×𝟏 的矩阵 𝑵,对两矩阵进行减法操作(要求实现三种不同的形式),给出结果并分析三种方式的不同(如果出现报错,分析报错的原因),同时需要指出在计算过程中发生了什么。**" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "79ea46db-cf49-436c-9b5b-c6562d0da9e2", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "M矩阵:\n", - "tensor([[1., 2., 3.]], device='cuda:0')\n", - "N矩阵:\n", - "tensor([[4.],\n", - " [5.]], device='cuda:0')\n", - "运行结果:\n", - "方法1 - 使用PyTorch的减法操作符:\n", - "tensor([[-3., -2., -1.],\n", - " [-4., -3., -2.]], device='cuda:0')\n", - "方法2 - 使用PyTorch的sub函数:\n", - "tensor([[-3., -2., -1.],\n", - " [-4., -3., -2.]], device='cuda:0')\n", - "方法3 - 手动实现广播机制并作差:\n", - "tensor([[-3., -2., -1.],\n", - " [-4., -3., -2.]], device='cuda:0')\n" - ] - } - ], - "source": [ - "M = torch.tensor([[1, 2, 3]], dtype=torch.float32, device=device)\n", - "N = torch.tensor([[4], [5]], dtype=torch.float32, device=device)\n", - "\n", - "# 方法1: 使用PyTorch的减法操作符\n", - "result1 = M - N\n", - "\n", - "# 方法2: 使用PyTorch的sub函数\n", - "result2 = torch.sub(M, N)\n", - "\n", - "# 方法3: 手动实现广播机制并作差\n", - "def my_sub(a: torch.Tensor, b: torch.Tensor):\n", - " if not ((a.size(0) == 1 and b.size(1) == 1) or (a.size(1) == 1 and b.size(0) == 1)):\n", - " raise ValueError(\"输入的张量大小无法满足广播机制的条件。\")\n", - " else:\n", - " target_shape = torch.Size([max(a.size(0), b.size(0)), max(a.size(1), b.size(1))])\n", - " a_broadcasted = a.expand(target_shape)\n", - " b_broadcasted = b.expand(target_shape)\n", - " result = torch.zeros(target_shape, dtype=a_broadcasted.dtype, device=a_broadcasted.device)\n", - " for i in range(target_shape[0]):\n", - " for j in range(target_shape[1]):\n", - " result[i, j] = a_broadcasted[i, j] - b_broadcasted[i, j]\n", - " return result\n", - "result3 = my_sub(M, N)\n", - "\n", - "print(f\"M矩阵:\\n{M}\")\n", - "print(f\"N矩阵:\\n{N}\")\n", - "print(\"运行结果:\")\n", - "print(f\"方法1 - 使用PyTorch的减法操作符:\\n{result1}\")\n", - "print(f\"方法2 - 使用PyTorch的sub函数:\\n{result2}\")\n", - "print(f\"方法3 - 手动实现广播机制并作差:\\n{result3}\")" - ] - }, - { - "cell_type": "markdown", - "id": "bd9bd5cc-b6da-4dd6-a599-76498bc5247d", - "metadata": {}, - "source": [ - "第1、2、3种减法形式实质是一样的。\n", - "\n", - "步骤如下:\n", - "1. 对A、B两个张量进行广播,将A、B向广播的方向复制,得到两个$\\max(A.size(0), B.size(0))\\times \\max(A.size(1), B.size(1))$的张量;\n", - "2. 对广播后的两个张量作差,尺寸不变。\n", - "\n", - "第1种减法形式和第2种是等价的,前者是后者的符号化表示。\n", - "\n", - "第3种形式是手动实现的,将上述两个步骤分别手动实现了。但是torch.Tensor还内置了其他机制,这里仅模拟了广播和作差。" - ] - }, - { - "cell_type": "markdown", - "id": "2489a3ad-f6ff-4561-bb26-e02654090b98", - "metadata": {}, - "source": [ - "## 题目2\n", - "1. **利用Tensor创建两个大小分别$3\\times 2$和$4\\times 2$的随机数矩阵$P$和$Q$,要求服从均值为$0$,标准差$0.01$为的正态分布;**\n", - "2. **对第二步得到的矩阵$Q$进行形状变换得到$Q$的转置$Q^T$;**\n", - "3. **对上述得到的矩阵$P$和矩阵$Q^T$求矩阵相乘。**" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "41e4ee02-1d05-4101-b3f0-477bac0277fb", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "矩阵 P:\n", - "tensor([[ 0.0019, 0.0216],\n", - " [-0.0017, 0.0085],\n", - " [-0.0192, 0.0065]], device='cuda:0')\n", - "矩阵 Q:\n", - "tensor([[ 1.3914e-03, -1.0822e-03],\n", - " [-7.1742e-03, 7.5665e-03],\n", - " [ 3.7149e-03, -1.0049e-02],\n", - " [ 8.2947e-05, 3.2766e-03]], device='cuda:0')\n", - "矩阵 Q^T:\n", - "tensor([[ 1.3914e-03, -7.1742e-03, 3.7149e-03, 8.2947e-05],\n", - " [-1.0822e-03, 7.5665e-03, -1.0049e-02, 3.2766e-03]], device='cuda:0')\n", - "矩阵相乘的结果:\n", - "tensor([[-2.0690e-05, 1.4962e-04, -2.1000e-04, 7.0980e-05],\n", - " [-1.1582e-05, 7.6587e-05, -9.1717e-05, 2.7677e-05],\n", - " [-3.3842e-05, 1.8747e-04, -1.3711e-04, 1.9799e-05]], device='cuda:0')\n" - ] - } - ], - "source": [ - "P = torch.normal(mean=0, std=0.01, size=(3, 2), device=device)\n", - "Q = torch.normal(mean=0, std=0.01, size=(4, 2), device=device)\n", - "\n", - "print(\"矩阵 P:\")\n", - "print(P)\n", - "print(\"矩阵 Q:\")\n", - "print(Q)\n", - "\n", - "# 对矩阵Q进行转置操作,得到矩阵Q的转置Q^T\n", - "QT = Q.T\n", - "print(f\"矩阵 Q^T:\\n{QT}\")\n", - "\n", - "# 计算矩阵P和矩阵Q^T的矩阵相乘\n", - "print(f\"矩阵相乘的结果:\\n{torch.matmul(P, QT)}\")" - ] - }, - { - "cell_type": "markdown", - "id": "cea9cb6d-adde-4e08-b9f2-8c417abf4231", - "metadata": {}, - "source": [ - "## 题目3\n", - "**给定公式$ y_3=y_1+y_2=𝑥^2+𝑥^3$,且$x=1$。利用学习所得到的Tensor的相关知识,求$y_3$对$x$的梯度,即$\\frac{dy_3}{dx}$。**" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "951512cd-d915-4d04-959f-eb99d1971e2d", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "仅通过y_1传递的梯度: 2.0\n", - "仅通过y_2传递的梯度: 3.0\n", - "dy_3/dx: 5.0\n" - ] - } - ], - "source": [ - "x = torch.tensor(1.0, requires_grad=True, device=device)\n", - "\n", - "y_1 = x ** 2\n", - "with torch.no_grad():\n", - " y_2 = x ** 3\n", - "y_3 = y_1 + y_2\n", - "y_3.backward()\n", - "print(\"仅通过y_1传递的梯度: \", x.grad.item())\n", - "\n", - "x.grad.data.zero_()\n", - "with torch.no_grad():\n", - " y_1 = x ** 2\n", - "y_2 = x ** 3\n", - "y_3 = y_1 + y_2\n", - "y_3.backward()\n", - "print(\"仅通过y_2传递的梯度: \", x.grad.item())\n", - "\n", - "x.grad.data.zero_()\n", - "y_1 = x ** 2\n", - "y_2 = x ** 3\n", - "y_3 = y_1 + y_2\n", - "y_3.backward()\n", - "\n", - "print(\"dy_3/dx: \", x.grad.item())" - ] - }, - { - "cell_type": "markdown", - "id": "3269dbf6-889a-49eb-8094-1e588e1a6c30", - "metadata": {}, - "source": [ - "# 二、动手实现logistic回归\n", - "## 题目1\n", - "**要求动手从0实现 logistic 回归(只借助Tensor和Numpy相关的库)在人工构造的数据集上进行训练和测试,并从loss以及训练集上的准确率等多个角度对结果进行分析(可借助nn.BCELoss或nn.BCEWithLogitsLoss作为损失函数,从零实现二元交叉熵为选作)**" - ] - }, - { - "cell_type": "markdown", - "id": "bcd12aa9-f187-4d88-8c59-af6d16107edb", - "metadata": {}, - "source": [ - "给定预测输出$ \\hat{y} $和目标标签$ y$(通常是0或1),BCELoss的计算公式如下:\n", - "$$\n", - " \\text{BCELoss}(\\hat{y}, y) = -\\frac{1}{N} \\sum_{i=1}^{N} \\left(y_i \\cdot \\log(\\hat{y}_i) + (1 - y_i) \\cdot \\log(1 - \\hat{y}_i)\\right) \n", - "$$\n", - "其中,$N $是样本数量,$\\hat{y}_i $表示模型的预测概率向量中的第$ i $个元素,$y_i $表示实际的目标标签中的第$ i $个元素。在二分类问题中,$y_i $通常是0或1。这个公式表示对所有样本的二分类交叉熵损失进行了求和并取平均。\n", - "\n", - "因此BCELoss的手动实现如下。" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "e31b86ec-4114-48dd-8d73-fe4e0686419a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "输入:\n", - "tensor([0.6900], device='cuda:0')\n", - "标签:\n", - "tensor([1.], device='cuda:0')\n", - "My_BCELoss损失值: 0.37110066413879395\n", - "nn.BCELoss损失值: 0.37110066413879395\n" - ] - } - ], - "source": [ - "class My_BCELoss:\n", - " def __call__(self, prediction: torch.Tensor, target: torch.Tensor):\n", - " eps = 1e-9\n", - " loss = -torch.mean(target * torch.log(prediction + eps) + (1 - target) * torch.log(1 - prediction + eps))\n", - " return loss\n", - "\n", - "\n", - "# 测试\n", - "prediction = torch.sigmoid(torch.tensor([0.8], device=device))\n", - "target = torch.tensor([1.0], device=device)\n", - "print(f\"输入:\\n{prediction}\")\n", - "print(f\"标签:\\n{target}\")\n", - "\n", - "my_bce_loss = My_BCELoss()\n", - "my_loss = my_bce_loss(prediction, target)\n", - "print(\"My_BCELoss损失值:\", my_loss.item())\n", - "\n", - "nn_bce_loss = nn.BCELoss()\n", - "nn_loss = nn_bce_loss(prediction, target)\n", - "print(\"nn.BCELoss损失值:\", nn_loss.item())" - ] - }, - { - "cell_type": "markdown", - "id": "345b0300-8808-4c43-9bf9-05a7e6e1f5af", - "metadata": {}, - "source": [ - "Optimizer的实现较为简单。\n", - "\n", - "主要实现:\n", - "- 传入参数:`__init__()`\n", - "- 对传入的参数进行更新:`step()`\n", - "- 清空传入参数存储的梯度:`zero_grad()`\n", - "\n", - "但是有一点需要注意,就是需要将传进来的`params`参数转化为`list`类型。因为`nn.Module`的`parameters()`方法会以``的类型返回模型的参数,但是该类型变量无法像`list`一样使用`for`循环遍历。" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "0297066c-9fc1-448d-bdcb-29a6f1519117", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "x的初始值: 1.0\n", - "学习率: 0.1\n", - "y.backward()之后,x的梯度: 2.0\n", - "optimizer_test.step()之后,x的值: 0.800000011920929\n", - "optimizer_test.zero_grad()之后,x的梯度: 0.0\n" - ] - } - ], - "source": [ - "class My_Optimizer:\n", - " def __init__(self, params: list[torch.Tensor], lr: float):\n", - " self.params = list(params)\n", - " self.lr = lr\n", - "\n", - " def step(self):\n", - " for param in self.params:\n", - " if param.grad is not None:\n", - " param.data = param.data - self.lr * param.grad.data\n", - "\n", - " def zero_grad(self):\n", - " for param in self.params:\n", - " if param.grad is not None:\n", - " param.grad.data.zero_()\n", - "\n", - "\n", - "# 测试\n", - "x = torch.tensor(1.0, requires_grad=True, device=device)\n", - "print(\"x的初始值: \", x.item())\n", - "\n", - "optimizer_test = My_Optimizer([x], lr=0.1)\n", - "print(\"学习率: \", optimizer_test.lr)\n", - "\n", - "y = x ** 2\n", - "y.backward()\n", - "print(\"y.backward()之后,x的梯度: \", x.grad.item())\n", - "\n", - "optimizer_test.step()\n", - "print(\"optimizer_test.step()之后,x的值: \", x.item())\n", - "\n", - "optimizer_test.zero_grad()\n", - "print(\"optimizer_test.zero_grad()之后,x的梯度: \", x.grad.item())" - ] - }, - { - "cell_type": "markdown", - "id": "8cbc476a-2438-4d0d-854a-4cdd2f726363", - "metadata": {}, - "source": [ - "接下来实现Logistic回归的Trainer,包括训练流程和画图。\n", - "\n", - "训练进行如下步骤:\n", - "1. 定义模型、数据集、损失函数、优化器和其他超参数\n", - "2. 训练\n", - " 1. 从训练dataloader中获取批量数据\n", - " 2. 传入模型\n", - " 3. 使用损失函数计算与ground_truth的损失\n", - " 4. 使用优化器进行反向传播\n", - " 5. 循环以上步骤" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "d28d5245-bb60-4baf-be54-8c4944ec9180", - "metadata": {}, - "outputs": [], - "source": [ - "class LogisticTrainer():\n", - " def __init__(\n", - " self,\n", - " model,\n", - " dataset: Union[Dataset, DataLoader],\n", - " optimizer: Literal['torch', 'manual'],\n", - " criterion: Literal['torch', 'manual'],\n", - " learning_rate: float,\n", - " num_epochs: int,\n", - " batch_size: int,\n", - " ):\n", - " self.model = model\n", - " self.learning_rate = learning_rate\n", - " self.num_epochs = num_epochs\n", - " self.batch_size = batch_size\n", - "\n", - " if isinstance(dataset, Dataset):\n", - " self.dataloader = DataLoader(\n", - " dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=cpu_count()\n", - " )\n", - " else:\n", - " self.dataloader = dataset\n", - "\n", - " if optimizer == 'torch':\n", - " self.optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)\n", - " else:\n", - " self.optimizer = My_Optimizer(model.parameters(), lr=learning_rate)\n", - "\n", - " if criterion == 'torch':\n", - " self.criterion = nn.BCELoss()\n", - " else:\n", - " self.criterion = My_BCELoss()\n", - "\n", - " def train(self):\n", - " loss_curve = []\n", - " step = 0\n", - " total_train_steps = self.num_epochs * len(self.dataloader)\n", - " num_sample_per_epoch = len(self.dataloader) * self.batch_size\n", - " with tqdm(total=total_train_steps) as pbar:\n", - " for epoch in range(self.num_epochs):\n", - " total_epoch_loss = 0\n", - " total_epoch_acc = 0\n", - " for x, targets in self.dataloader:\n", - " x = x.to(device=device, dtype=torch.float32)\n", - " targets = targets.to(device=device, dtype=torch.float32)\n", - "\n", - " self.optimizer.zero_grad()\n", - " output = self.model(x)\n", - " loss = self.criterion(output, targets)\n", - " total_epoch_loss += loss.item()\n", - " loss_curve.append(loss.item())\n", - " \n", - " preds = (output >= 0.5).float()\n", - " total_epoch_acc += (preds == targets).float().sum().item()\n", - " \n", - " loss.backward()\n", - " self.optimizer.step()\n", - "\n", - " step += 1\n", - " pbar.update(1)\n", - "\n", - " log_info = {\n", - " 'Epoch': f'{epoch + 1}/{self.num_epochs}',\n", - " 'Total Loss': f'{total_epoch_loss:.2f}',\n", - " 'Avg Acc': f'{total_epoch_acc / num_sample_per_epoch:.2%}'\n", - " }\n", - " print(log_info)\n", - " \n", - " self.plot_results(loss_curve)\n", - " \n", - " def plot_results(self, loss_curve):\n", - " fig, axes = plt.subplots(1, 2, figsize=(10, 4))\n", - "\n", - " axes[0].plot(loss_curve, label='Training Loss')\n", - " axes[0].set_xlabel('Step')\n", - " axes[0].set_ylabel('Loss')\n", - " axes[0].set_title('Loss Curve')\n", - " axes[0].legend()\n", - " axes[0].grid(True)\n", - "\n", - " x, label = next(iter(self.dataloader))\n", - " inputs = x.cpu().numpy()\n", - " labels = label.cpu().numpy()\n", - " x_data = inputs[:, 0]\n", - " y_data = inputs[:, 1]\n", - " \n", - " w = self.model.linear.weight.detach().cpu().numpy()[0]\n", - " w_x, w_y = w[0], w[1]\n", - " b = self.model.linear.bias.detach().cpu().numpy()[0]\n", - " x_vals = np.linspace(-1, 1, 100)\n", - " y_model = - (w_x * x_vals + b) / w_y\n", - " y_target = 4 - 3 * x_vals\n", - " \n", - " axes[1].plot(x_vals, y_target, label='Target Line: y=4-3x', linestyle='--', color='green')\n", - " axes[1].plot(x_vals, y_model, label='Model Decision Boundary', color='red')\n", - "\n", - " label_0_shown, label_1_shown = False, False\n", - " for i in range(min(100, len(x_data))):\n", - " label_val = int(labels[i].item())\n", - " if label_val == 1:\n", - " color = 'blue'\n", - " label_name = 'Label=1' if not label_1_shown else \"\"\n", - " label_1_shown = True\n", - " else:\n", - " color = 'orange'\n", - " label_name = 'Label=0' if not label_0_shown else \"\"\n", - " label_0_shown = True\n", - " axes[1].scatter(x_data[i], y_data[i], color=color, label=label_name)\n", - " \n", - " axes[1].set_xlabel('x')\n", - " axes[1].set_ylabel('y')\n", - " axes[1].set_title('Fitted Line vs Target Line')\n", - " axes[1].legend()\n", - " axes[1].grid(True)\n", - " \n", - " plt.tight_layout()\n", - " plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "6ab83528-a88b-4d66-b0c9-b1315cf75c22", - "metadata": {}, - "source": [ - "线性层主要有一个权重(weight)和一个偏置(bias)。\n", - "线性层的数学公式如下:\n", - "$$\n", - "x:=x \\times weight^T+bias\n", - "$$\n", - "因此代码实现如下:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "8e18695a-d8c5-4f77-8b5c-de40d9240fb9", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "输入:\n", - "tensor([[1.],\n", - " [2.]], device='cuda:0', requires_grad=True)\n", - "权重:\n", - "tensor([[ 0.8815],\n", - " [-0.7336],\n", - " [ 0.8692]], device='cuda:0')\n", - "偏置:\n", - "tensor([0.1872, 0.7388, 0.1354], device='cuda:0')\n", - "My_Linear输出:\n", - "tensor([[ 1.0687, 0.0052, 1.0046],\n", - " [ 1.9502, -0.7284, 1.8738]], device='cuda:0', grad_fn=)\n", - "nn.Linear输出:\n", - "tensor([[ 1.0687, 0.0052, 1.0046],\n", - " [ 1.9502, -0.7284, 1.8738]], device='cuda:0',\n", - " grad_fn=)\n" - ] - } - ], - "source": [ - "class My_Linear:\n", - " def __init__(self, input_feature: int, output_feature: int):\n", - " self.weight = torch.randn((output_feature, input_feature), requires_grad=True, dtype=torch.float32)\n", - " self.bias = torch.zeros(1, requires_grad=True, dtype=torch.float32)\n", - " self.params = [self.weight, self.bias]\n", - "\n", - " def __call__(self, x: torch.Tensor):\n", - " return self.forward(x)\n", - "\n", - " def forward(self, x: torch.Tensor):\n", - " x = torch.matmul(x, self.weight.T) + self.bias\n", - " return x\n", - "\n", - " def to(self, device: str):\n", - " for param in self.params:\n", - " param.data = param.data.to(device=device)\n", - " return self\n", - "\n", - " def parameters(self):\n", - " return self.params\n", - "\n", - " \n", - "# 测试\n", - "my_linear = My_Linear(1, 3).to(device)\n", - "nn_linear = nn.Linear(1, 3).to(device)\n", - "my_linear.weight = nn_linear.weight.clone().requires_grad_()\n", - "my_linear.bias = nn_linear.bias.clone().requires_grad_()\n", - "x = torch.tensor([[1.], [2.]], requires_grad=True, device=device)\n", - "print(f\"输入:\\n{x}\")\n", - "print(f\"权重:\\n{my_linear.weight.data}\")\n", - "print(f\"偏置:\\n{my_linear.bias.data}\")\n", - "y_my_linear = my_linear(x)\n", - "print(f\"My_Linear输出:\\n{y_my_linear}\")\n", - "y_nn_linear = nn_linear(x)\n", - "print(f\"nn.Linear输出:\\n{y_nn_linear}\")" - ] - }, - { - "cell_type": "markdown", - "id": "5ff813cc-c1f0-4c73-a3e8-d6796ef5d366", - "metadata": {}, - "source": [ - "手动实现logistic回归模型。\n", - "\n", - "模型很简单,主要由一个线性层和一个sigmoid层组成。\n", - "\n", - "Sigmoid函数(又称为 Logistic函数)是一种常用的激活函数,通常用于神经网络的输出层或隐藏层,其作用是将输入的实数值压缩到一个范围在0和1之间的数值:\n", - "\n", - "$$\n", - "\\sigma(x) = {(1 + e^{-x})}^{-1}\n", - "$$\n", - "\n", - "由于当$x << 0$时,$e^{-x}$较大,进而导致${(1 + e^{-x})}^{-1}$产生数值下溢。因此对Sigmoid函数公式进行优化:\n", - "$$\n", - "\\sigma(x) = \n", - "\\begin{cases}\n", - "\\frac{1}{1 + e^{-x}}, & \\text{if } x \\geq 0 \\\\\n", - "\\frac{e^{x}}{1 + e^{x}}, & \\text{if } x < 0\n", - "\\end{cases}\n", - "$$" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "e7de7e4b-a084-4793-812e-46e8550ecd8d", - "metadata": {}, - "outputs": [], - "source": [ - "def my_sigmoid(x: torch.Tensor):\n", - " z = torch.exp(-x.abs())\n", - " return torch.where(x >= 0, 1 / (1 + z), z / (1 + z))\n", - "\n", - "\n", - "class Model_2_1():\n", - " def __init__(self):\n", - " self.linear = My_Linear(2, 1)\n", - " self.params = self.linear.params\n", - "\n", - " def __call__(self, x):\n", - " return self.forward(x)\n", - "\n", - " def forward(self, x):\n", - " x = self.linear(x)\n", - " x = my_sigmoid(x)\n", - " return x\n", - "\n", - " def to(self, device: str):\n", - " for param in self.params:\n", - " param.data = param.data.to(device=device)\n", - " return self\n", - "\n", - " def parameters(self):\n", - " return self.params" - ] - }, - { - "cell_type": "markdown", - "id": "e14acea9-e5ef-4c24-aea9-329647224ce1", - "metadata": {}, - "source": [ - "人工随机构造数据集。\n", - "\n", - "我的y设置为$4-3\\times x + noise$,noise为随机噪声。\n", - "\n", - "生成完x和y后判断给出ground truth,并写好DataLoader访问数据集的接口`__getitem__()`。" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "c39fbafb-62e4-4b8c-9d65-6718d25f2970", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "测试数据集大小:1000000\n", - "测试数据集第0对数据:\n", - "x_0 = tensor([-0.2509, 3.0322])\n", - "y_0 = tensor([0.])\n" - ] - } - ], - "source": [ - "class My_Dataset(Dataset):\n", - " def __init__(self, data_size=1000000):\n", - " x = np.random.uniform(low=-1, high=1, size=(data_size, 1))\n", - " noise = np.random.normal(loc=0, scale=1, size=(data_size, 1))\n", - " y = 4 - 3 * x + noise\n", - " labels = (y > 4 - 3 * x).astype(np.float32)\n", - " self.inputs = torch.tensor(np.concatenate([x, y], axis=1), dtype=torch.float32)\n", - " self.labels = torch.tensor(labels, dtype=torch.float32)\n", - "\n", - " def __len__(self):\n", - " return self.inputs.shape[0]\n", - "\n", - " def __getitem__(self, index):\n", - " return self.inputs[index], self.labels[index]\n", - "\n", - "\n", - "# 测试,并后面的训练创建变量\n", - "dataset = My_Dataset()\n", - "dataset_size = len(dataset)\n", - "print(f\"测试数据集大小:{dataset_size}\")\n", - "x0, y0 = dataset[0]\n", - "print(f\"测试数据集第0对数据:\")\n", - "print(f\"x_0 = {x0}\")\n", - "print(f\"y_0 = {y0}\")" - ] - }, - { - "cell_type": "markdown", - "id": "957a76a2-b306-47a8-912e-8fbf00cdfd42", - "metadata": {}, - "source": [ - "训练Logistic回归模型。" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "5612661e-2809-4d46-96c2-33ee9f44116d", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "131f3f0073f247b6901a50dde366d2c2", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/4885 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "hyper_params = {\n", - " 'learning_rate': 5.0e-2,\n", - " 'num_epochs': 5,\n", - " 'batch_size': 1024,\n", - "}\n", - "\n", - "model = Model_2_1().to(device)\n", - "trainer = LogisticTrainer(model=model, dataset=dataset, optimizer='manual', criterion='manual', **hyper_params)\n", - "trainer.train()" - ] - }, - { - "cell_type": "markdown", - "id": "9e416582-a30d-4084-acc6-6e05f80a6aff", - "metadata": {}, - "source": [ - "## 题目2\n", - "**利用 torch.nn 实现 logistic 回归在人工构造的数据集上进行训练和测试,并对结果进行分析,并从loss以及训练集上的准确率等多个角度对结果进行分析**" - ] - }, - { - "cell_type": "markdown", - "id": "0460d125-7d03-44fe-845c-c4d13792e241", - "metadata": {}, - "source": [ - "使用torch.nn实现模型。\n", - "\n", - "将之前的Model_2_1中的手动实现函数改为torch.nn内置函数即可,再加上继承nn.Module以使用torch.nn内置模型模板特性。" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "fa121afd-a1af-4193-9b54-68041e0ed068", - "metadata": {}, - "outputs": [], - "source": [ - "class Model_2_2(nn.Module):\n", - " def __init__(self):\n", - " super(Model_2_2, self).__init__()\n", - " self.linear = nn.Linear(2, 1, dtype=torch.float32)\n", - "\n", - " def forward(self, x):\n", - " x = self.linear(x)\n", - " x = torch.sigmoid(x)\n", - " return x" - ] - }, - { - "cell_type": "markdown", - "id": "176eee7e-4e3d-470e-8af2-8761bca039f8", - "metadata": {}, - "source": [ - "训练与测试过程与之前手动实现的一致。" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "93b0fdb6-be8b-4663-b59e-05ed19a9ea09", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "9520af64d1cd4867850624e0605f3745", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/4885 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "hyper_params = {\n", - " 'learning_rate': 5.0e-2,\n", - " 'num_epochs': 5,\n", - " 'batch_size': 1024,\n", - "}\n", - "\n", - "model = Model_2_2().to(device)\n", - "trainer = LogisticTrainer(model=model, dataset=dataset, optimizer='torch', criterion='torch', **hyper_params)\n", - "trainer.train()" - ] - }, - { - "cell_type": "markdown", - "id": "e6bff679-f8d2-46cc-bdcb-82af7dab38b3", - "metadata": {}, - "source": [ - "对比发现,手动实现的损失函数和优化器与torch.nn的内置损失函数和优化器相比,表现差不多。" - ] - }, - { - "cell_type": "markdown", - "id": "ef41d7fa-c2bf-4024-833b-60af0a87043a", - "metadata": {}, - "source": [ - "# 三、动手实现softmax回归\n", - "\n", - "## 问题1\n", - "\n", - "**要求动手从0实现softmax回归(只借助Tensor和Numpy相关的库)在Fashion-MNIST数据集上进行训练和测试,并从loss、训练集以及测试集上的准确率等多个角度对结果进行分析(要求从零实现交叉熵损失函数)**" - ] - }, - { - "cell_type": "markdown", - "id": "902603a6-bfb9-4ce3-bd0d-b00cebb1d3cb", - "metadata": {}, - "source": [ - "手动实现CrossEntropyLoss。\n", - "\n", - "CrossEntropyLoss由一个log_softmax和一个nll_loss组成。\n", - "\n", - "softmax的数学表达式如下:\n", - "$$\n", - "\\text{softmax}(x_i) = \\frac{e^{x_i}}{\\sum_{j=1}^{N} e^{x_j}} = \\frac{e^{x_i - \\text{max}(x)}}{\\sum_{j=1}^{N} e^{x_j - \\text{max}(x)}} \n", - "$$\n", - "log_softmax即为$\\log(\\text{softmax}(x))$,但可以进一步优化:\n", - "$$\n", - "\\text{logsoftmax}(x_i) = \\log{\\frac{e^{x_i - \\text{max}(x)}}{\\sum_{j=1}^{N} e^{x_j - \\text{max}(x)}}} = x_i - \\text{max}(x) - \\log{\\sum_{j=1}^{N} e^{x_j - \\text{max}(x)}}\n", - "$$\n", - "\n", - "CrossEntropyLoss的数学表达式如下:\n", - "$$\n", - "\\text{CrossEntropyLoss}(x, \\hat{x}) = -\\frac{1}{N} \\sum_{i=1}^{N} \\hat{x}_i \\cdot \\log(\\text{softmax}(x_i)) \n", - "$$\n", - "\n", - "故代码如下:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "759a3bb2-b5f4-4ea5-a2d7-15f0c4cdd14b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "输入:\n", - "tensor([[-0.1808, -0.6778, -0.5920, -0.6382, -1.9187],\n", - " [-0.6441, -0.6061, -0.1425, 0.9727, 2.0038],\n", - " [ 0.6622, 0.5332, 2.7489, -0.3841, -1.9623]], requires_grad=True)\n", - "标签:\n", - "tensor([2, 0, 1])\n", - "My_CrossEntropyLoss损失值: 2.377387762069702\n", - "nn.CrossEntropyLoss损失值: 2.377387762069702\n" - ] - } - ], - "source": [ - "class My_Softmax:\n", - " def __init__(self, dim: int):\n", - " self.dim = dim\n", - " def __call__(self, x: torch.Tensor):\n", - " max_x = torch.max(x, dim=self.dim, keepdim=True).values\n", - " exp_x = torch.exp(x - max_x)\n", - " return exp_x / torch.sum(exp_x, dim=self.dim, keepdim=True)\n", - "\n", - "def my_logsoftmax(x: torch.Tensor):\n", - " max_x = torch.max(x, dim=1, keepdim=True).values\n", - " exp_x = torch.exp(x - max_x)\n", - " return x - max_x - torch.log(torch.sum(exp_x, dim=1, keepdim=True))\n", - "\n", - "class My_CrossEntropyLoss:\n", - " def __call__(\n", - " self, \n", - " predictions: torch.Tensor, \n", - " targets: torch.Tensor, \n", - " reduction: Literal[\"mean\", \"sum\"] = \"mean\"\n", - " ):\n", - " log_probs = my_logsoftmax(predictions)\n", - " \n", - " if len(predictions.shape) == len(targets.shape) + 1:\n", - " nll_loss = -log_probs.gather(1, targets.unsqueeze(-1)).squeeze()\n", - " else:\n", - " nll_loss = -torch.sum(targets * log_probs, dim=1)\n", - " \n", - " if reduction == \"mean\": \n", - " return torch.mean(nll_loss)\n", - " else: \n", - " return torch.sum(nll_loss)\n", - "\n", - " \n", - "# 测试\n", - "input = torch.randn(3, 5, requires_grad=True)\n", - "target = torch.randn(3, 5).softmax(dim=1).argmax(1)\n", - "print(f\"输入:\\n{input}\")\n", - "print(f\"标签:\\n{target}\")\n", - "\n", - "my_crossentropyloss = My_CrossEntropyLoss()\n", - "my_loss = my_crossentropyloss(input, target)\n", - "print(\"My_CrossEntropyLoss损失值:\", my_loss.item())\n", - "\n", - "nn_crossentropyloss = nn.CrossEntropyLoss()\n", - "nn_loss = nn_crossentropyloss(input, target)\n", - "print(\"nn.CrossEntropyLoss损失值:\", nn_loss.item())" - ] - }, - { - "cell_type": "markdown", - "id": "92c224a3-8c27-4392-9017-aa526030a0a6", - "metadata": {}, - "source": [ - "接下来实现Softmax回归的Trainer,包括训练流程、测试和画图。\n", - "\n", - "训练softmax回归模型,进行如下步骤:\n", - "1. 定义模型、数据集、损失函数、优化器和其他超参数\n", - "2. 训练\n", - " 1. 从训练dataloader中获取批量数据\n", - " 2. 传入模型\n", - " 3. 使用损失函数计算与ground_truth的损失\n", - " 4. 使用优化器进行反向传播\n", - " 5. 循环以上步骤\n", - "3. 验证及测试\n", - " 1. 从验证或测试dataloader中获取批量数据\n", - " 2. 传入模型,验证时需要将模型输出与ground_truth进行比较得计算loss\n", - " 3. 将预测值与ground_truth进行比较,得出正确率\n", - " 4. 对整个训练集统计正确率,从而分析训练效果" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "159fc93c-fa21-4a94-b460-dda9e8557a43", - "metadata": {}, - "outputs": [], - "source": [ - "class SoftmaxTrainer():\n", - " def __init__(\n", - " self,\n", - " model,\n", - " train_dataset: Union[Dataset, DataLoader],\n", - " eval_dataset: Union[Dataset, DataLoader],\n", - " test_dataset: Union[Dataset, DataLoader],\n", - " optimizer: Literal['torch', 'manual'],\n", - " criterion: Literal['torch', 'manual'],\n", - " learning_rate: float,\n", - " num_epochs: int,\n", - " batch_size: int,\n", - " ):\n", - " self.model = model\n", - " self.learning_rate = learning_rate\n", - " self.num_epochs = num_epochs\n", - " self.batch_size = batch_size\n", - "\n", - " if isinstance(train_dataset, Dataset):\n", - " self.train_dataloader = DataLoader(\n", - " dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=cpu_count()\n", - " )\n", - " else:\n", - " self.train_dataloader = train_dataset\n", - " if isinstance(eval_dataset, Dataset):\n", - " self.eval_dataloader = DataLoader(\n", - " dataset=eval_dataset, batch_size=batch_size, shuffle=True, num_workers=cpu_count()\n", - " )\n", - " else:\n", - " self.eval_dataloader = eval_dataset\n", - " if isinstance(test_dataset, Dataset):\n", - " self.test_dataloader = DataLoader(\n", - " dataset=test_dataset, batch_size=batch_size, shuffle=True, num_workers=cpu_count()\n", - " )\n", - " else:\n", - " self.test_dataloader = test_dataset\n", - "\n", - " if optimizer == 'torch':\n", - " self.optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)\n", - " else:\n", - " self.optimizer = My_Optimizer(model.parameters(), lr=learning_rate)\n", - "\n", - " if criterion == 'torch':\n", - " self.criterion = nn.CrossEntropyLoss()\n", - " self.softmax = nn.Softmax(dim=1)\n", - " else:\n", - " self.criterion = My_CrossEntropyLoss()\n", - " self.softmax = My_Softmax(dim=1)\n", - "\n", - " def train(self):\n", - " train_loss_curve = []\n", - " eval_loss_curve = []\n", - " eval_acc_curve = []\n", - " step = 0\n", - " total_train_steps = self.num_epochs * len(self.train_dataloader)\n", - " with tqdm(total=total_train_steps) as pbar:\n", - " for epoch in range(self.num_epochs):\n", - " total_train_loss = 0\n", - " for x, targets in self.train_dataloader:\n", - " x = x.to(device=device, dtype=torch.float32)\n", - " targets = targets.to(device=device, dtype=torch.long)\n", - "\n", - " self.optimizer.zero_grad()\n", - " output = self.model(x)\n", - " loss = self.criterion(output, targets)\n", - " total_train_loss += loss.item()\n", - " train_loss_curve.append(loss.item())\n", - " \n", - " loss.backward()\n", - " self.optimizer.step()\n", - " step += 1\n", - " pbar.update(1)\n", - "\n", - " avg_eval_loss, avg_eval_acc = self.eval()\n", - " eval_loss_curve.append(avg_eval_loss)\n", - " eval_acc_curve.append(avg_eval_acc)\n", - " log_info = {\n", - " 'Epoch': f'{epoch + 1}/{self.num_epochs}',\n", - " 'Total Train Loss': f'{total_train_loss:.2f}',\n", - " 'Scaled Total Valid Loss': f'{avg_eval_loss * len(self.train_dataloader):.2f}',\n", - " 'Avg Valid Acc': f'{avg_eval_acc:.2%}'\n", - " }\n", - " print(log_info)\n", - "\n", - " print('Avg Test Acc:', f'{self.test():.2%}')\n", - " self.plot_results(train_loss_curve, eval_loss_curve, eval_acc_curve)\n", - "\n", - " def eval(self):\n", - " total_eval_loss = 0\n", - " total_eval_acc = 0\n", - " with torch.inference_mode():\n", - " for x, targets in self.eval_dataloader:\n", - " x = x.to(device=device, dtype=torch.float32)\n", - " targets = targets.to(device=device, dtype=torch.long)\n", - " output = self.model(x)\n", - " loss = self.criterion(output, targets)\n", - " total_eval_loss += loss.item()\n", - " preds = self.softmax(output).argmax(dim=1)\n", - " total_eval_acc += (preds == targets).float().sum().item()\n", - " \n", - " avg_eval_loss = total_eval_loss / len(self.eval_dataloader)\n", - " num_eval_sample = len(self.eval_dataloader) * self.batch_size\n", - " avg_eval_acc = total_eval_acc / num_eval_sample\n", - " return avg_eval_loss, avg_eval_acc\n", - "\n", - " def test(self):\n", - " total_test_acc = 0\n", - " with torch.inference_mode():\n", - " for x, targets in self.test_dataloader:\n", - " x = x.to(device=device, dtype=torch.float32)\n", - " targets = targets.to(device=device, dtype=torch.long)\n", - " output = self.model(x)\n", - " preds = self.softmax(output).argmax(dim=1)\n", - " total_test_acc += (preds == targets).float().sum().item()\n", - " num_test_sample = len(self.test_dataloader) * self.batch_size\n", - " avg_test_acc = total_test_acc / num_test_sample\n", - " return avg_test_acc\n", - " \n", - " def plot_results(self, train_loss_curve, eval_loss_curve, eval_acc_curve):\n", - " fig, axes = plt.subplots(1, 2, figsize=(10, 4))\n", - " \n", - " axes[0].plot(train_loss_curve, label='Training Loss', color='blue')\n", - " axes[0].plot(\n", - " np.linspace(len(self.train_dataloader), len(train_loss_curve), len(eval_loss_curve), endpoint=True),\n", - " eval_loss_curve, label='Validation Loss', color='orange'\n", - " )\n", - " axes[0].set_xlabel('Step')\n", - " axes[0].set_ylabel('Loss')\n", - " axes[0].set_title('Loss Curve')\n", - " axes[0].legend()\n", - " axes[0].grid(True)\n", - " \n", - " axes[1].plot(eval_acc_curve, label='Validation Accuracy', color='green', marker='o')\n", - " axes[1].set_xlabel('Epoch')\n", - " axes[1].set_ylabel('Accuracy')\n", - " axes[1].set_title('Validation Accuracy Curve')\n", - " axes[1].legend()\n", - " axes[1].grid(True)\n", - " \n", - " plt.tight_layout()\n", - " plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "dbf78501-f5be-4008-986c-d331d531491f", - "metadata": {}, - "source": [ - "手动实现Flatten。\n", - "\n", - "原理很简单,就是把多维的张量拉直成一个向量。" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "74322629-8325-4823-b80f-f28182d577c1", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Flatten之前的x:\n", - "tensor([[[1., 2.],\n", - " [3., 4.]],\n", - "\n", - " [[5., 6.],\n", - " [7., 8.]]])\n", - "My_Flatten之后的x:\n", - "tensor([[1., 2., 3., 4.],\n", - " [5., 6., 7., 8.]])\n", - "nn.Flatten之后的x:\n", - "tensor([[1., 2., 3., 4.],\n", - " [5., 6., 7., 8.]])\n" - ] - } - ], - "source": [ - "class My_Flatten:\n", - " def __call__(self, x: torch.Tensor):\n", - " return self.forward(x)\n", - "\n", - " def forward(self, x: torch.Tensor):\n", - " x = x.view(x.shape[0], -1)\n", - " return x\n", - "\n", - "\n", - "# 测试\n", - "my_flatten = My_Flatten()\n", - "nn_flatten = nn.Flatten()\n", - "x = torch.tensor(\n", - " [[[1., 2.], [3., 4.]],\n", - " [[5., 6.], [7., 8.]]]\n", - ")\n", - "print(f\"Flatten之前的x:\\n{x}\")\n", - "x_my_flatten = my_flatten(x)\n", - "print(f\"My_Flatten之后的x:\\n{x_my_flatten}\")\n", - "x_nn_flatten = nn_flatten(x)\n", - "print(f\"nn.Flatten之后的x:\\n{x_nn_flatten}\")" - ] - }, - { - "cell_type": "markdown", - "id": "35aee905-ae37-4faa-a7f1-a04cd8579f78", - "metadata": {}, - "source": [ - "手动实现softmax回归模型。\n", - "\n", - "模型很简单,主要由一个Flatten层和一个线性层组成。\n", - "\n", - "Flatten层主要用于将2维的图像展开,直接作为1维的特征量输入网络。" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "bb31a75e-464c-4b94-b927-b219a765e35d", - "metadata": {}, - "outputs": [], - "source": [ - "class Model_3_1:\n", - " def __init__(self, num_classes):\n", - " self.flatten = My_Flatten()\n", - " self.linear = My_Linear(28 * 28, num_classes)\n", - " self.params = self.linear.params\n", - "\n", - " def __call__(self, x: torch.Tensor):\n", - " return self.forward(x)\n", - "\n", - " def forward(self, x: torch.Tensor):\n", - " x = self.flatten(x)\n", - " x = self.linear(x)\n", - " return x\n", - "\n", - " def to(self, device: str):\n", - " for param in self.params:\n", - " param.data = param.data.to(device=device)\n", - " return self\n", - "\n", - " def parameters(self):\n", - " return self.params" - ] - }, - { - "cell_type": "markdown", - "id": "17e686d1-9c9a-4727-8fdc-9990d348c523", - "metadata": {}, - "source": [ - "训练与测试过程与之前手动实现的几乎一致。由于数据集的变化,对应超参数也进行了调整。\n", - "\n", - "数据集也使用了现成的FashionMNIST数据集,且划分了训练集和测试集。\n", - "\n", - "FashionMNIST数据集直接调用API获取。数据集的image为28*28的单通道灰白图片,label为单个数值标签。" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "02f7d7dc-e2a8-4127-b505-f31993a75131", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Train Dataset Size: 59000\n", - "Valid Dataset Size: 1000\n", - "Test Dataset Size: 10000\n", - "A Train Sample:\n", - "\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAIcAAACdCAYAAACeqmv3AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjUsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvWftoOwAAAAlwSFlzAAAPYQAAD2EBqD+naQAADiFJREFUeJztnWtsVFX3xp+h1rZURUFbpEJtLWKRJpJWRAJpEaE1ImkToyZeMN4So4kahOgXygejMUo0BhNJvDRGjR8m1XhpMKJFgmKLNlbA1tZykaogpSgoUqjd74d/Zv7dzzl7zkxnOi3v+/wSPqzTc87e57By9jNrr712yBhjIIQPE8a6A2L8IucQTuQcwomcQziRcwgncg7hRM4hnMg5hBM5h3DyX+0c+/btQygUwvPPP5+ye27ZsgWhUAhbtmxJ2T3HK+POORoaGhAKhfDNN9+MdVdGhcbGRtx6660oLi7GxIkTMWvWLKxatQp//PHHWHfNw1lj3YH/NR544AFMmzYNd9xxB2bMmIGdO3diw4YNaGpqQltbG3Jycsa6i1HkHGkmHA6jqqrKOlZeXo6VK1fi7bffxn333Tc2HfNh3A0r8XDq1CmsXbsW5eXlmDRpEnJzc7Fo0SI0Nzc7r3nhhRdQWFiInJwcVFZWYteuXZ5zOjs7cfPNN2Py5MnIzs5GRUUFPvjgg8D+nDhxAp2dnejr6ws8lx0DAOrq6gAAHR0dgdenkzPSOY4dO4ZXX30VVVVVePbZZ7Fu3TocPnwY1dXV+O677zznv/nmm3jppZfw0EMP4cknn8SuXbtw3XXX4dChQ9Fzdu/ejfnz56OjowNPPPEE1q9fj9zcXNTW1uK9996L2Z/W1laUlpZiw4YNI3qegwcPAgAuvPDCEV0/aphxxhtvvGEAmB07djjPGRwcNAMDA9axo0ePmvz8fHPPPfdEj+3du9cAMDk5Oaa3tzd6vKWlxQAwjz32WPTYkiVLTFlZmTl58mT02NDQkFmwYIGZOXNm9Fhzc7MBYJqbmz3H6uvrR/LI5t577zUZGRmmq6trRNePFmfklyMjIwNnn302AGBoaAj9/f0YHBxERUUF2traPOfX1taioKAgas+bNw/XXHMNmpqaAAD9/f34/PPPccstt+D48ePo6+tDX18fjhw5gurqanR3d+OXX35x9qeqqgrGGKxbty7hZ3nnnXfw2muvYdWqVZg5c2bC148qY+2dTDxfDmOMaWhoMGVlZSYzM9MAiP4rKiqKnhP5cqxdu9Zz/Z133mmysrKMMf//JYn1r62tzRjj/+UYKVu3bjXZ2dmmurranD59Oun7pZoz8tfKW2+9hbvvvhu1tbVYvXo18vLykJGRgWeeeQY9PT0J329oaAgA8Pjjj6O6utr3nJKSkqT6zLS3t2PFihWYM2cOwuEwzjpr/P1XjL8exUE4HEZxcTEaGxsRCoWix+vr633P7+7u9hzr6urCpZdeCgAoLi4GAGRmZuL6669PfYeJnp4e1NTUIC8vD01NTTjnnHNGvc2RcMZqDgAww3KjW1pasH37dt/z33//fUsztLa2oqWlBTfccAMAIC8vD1VVVdi4cSN+++03z/WHDx+O2Z9EfsoePHgQy5Ytw4QJE/DJJ5/goosuCrxmrBi3X47XX38dmzZt8hx/5JFHsHz5cjQ2NqKurg433ngj9u7di1deeQWzZ8/GX3/95bmmpKQECxcuxIMPPoiBgQG8+OKLmDJlCtasWRM95+WXX8bChQtRVlaG+++/H8XFxTh06BC2b9+O3t5etLe3O/va2tqKxYsXo76+PlCU1tTUYM+ePVizZg22bduGbdu2Rf+Wn5+PpUuXxvF20sRYix4mIkhd/w4cOGCGhobM008/bQoLC01WVpaZO3eu+eijj8zKlStNYWFh9F4RQfrcc8+Z9evXm+nTp5usrCyzaNEi097e7mm7p6fH3HXXXWbq1KkmMzPTFBQUmOXLl5twOBw9J9mfsrGerbKyMok3l3pCxmjdivDnjNQcIj3IOYQTOYdwIucQTuQcwomcQziRcwgncUdIh89hjBcicyMRli1b5jmHE5X37Nlj2UGJvTwhxm2yDQCnT5+2bE4jOH78eMw200E84S19OYQTOYdwkrKJtwkTbD+L5EgkwlVXXWXZU6dOtWz+HA8MDFj24sWLPfd89NFHLfvbb7+17NraWsvm6fNwOGzZPGvr12ZDQ4NlT5w4MWYbg4ODls1DX6xJv9FEXw7hRM4hnMg5hJO4p+yDfsqORHMUFhZadn9/v2WXlpbG/Pvff/9t2X5ZXLfffrtlz54927IrKiosm5+Tx39+XV988YWnzXfffdey8/PzLZvfFb+HSKZbhC+//NLTBsP9Dvpv1U9ZkRRyDuFEziGcpExzJDrmAcCKFSssm3/P79+/37KnTJli2Tw2n3vuuZ42Tpw4YdnTpk2L2SeOOUyePNmyh6+vBf5v3S6Tl5cXsw1OgmZdM2nSJMueM2eOZfvpnESR5hBJIecQTuQcwomcQzhJmSAdyT3YjixPjMCBNA5y8fJDzqMAvAKT+fPPPy07aN0qt3Hq1CnPORzk4jpf559/vmWzgJ0+fbpl//jjj5btV5UoUSRIRVLIOYQTOYdwklbNwcEcDgZx0IsDUCdPnrTsoqIiy/Yb//kYT3JxwlBWVpZl//PPP5YdpCf8rmHtxBOIrGN+/fVXy87MzLTsf//919MmP0cQ0hwiKeQcwomcQzhJq+bIzs627GuvvdayOebgN7YOh8s/sh4AgKNHj8bsA0/W8WQeJzXzOhfWKEDwu+Lkn4svvtiyWZNcffXVlr1161bPPQ8cOBCzTUaaQySFnEM4kXMIJ6OmOfi3ORCcOFNeXm7Zvb29ls3jf6R+aIRIyevhcJyDNQXDf+c4Bscw4nkv/C4ihfAj8HPNnTvXsjdv3hzYRqJIc4ikkHMIJ3IO4SStcY6bbrrJsnnOgWtpcJyDx3+Oc4xk8TbX3wjK/4iHoGRrrunBsRcup82xlBkzZnja5AXifrktsfrkh74cwomcQziRcwgnadUcDM8p8CKnBQsWWPbq1astm+McfrAOYR3DGoPjHDxfw5v0cdzDrw3WFBy3qKmpsWzWFD/88INlc5wEAH766SfPsVhIc4ikkHMIJ3IO4WTUdmrifFHAu0B49+7dlt3Z2WnZQbkXvG7Fr3gLawqOlbCW4rkYtllPxBNbYZ1y2WWXWTavY/nqq68sm9ep+G0WyO+K821Hgr4cwomcQziRcwgncg7hZNSCYCyQAO+EE+/dfvnll1s2V9H7+uuvLZuFGQteIDhJmeFqw5xAxPfzqybEIpYXKfE9r7jiCstesmSJZbNQ5yqKgFeMB1U9VhBMJIWcQziRcwgnKdMcPP7zjgeAN2GYF/ewZujq6rLsefPmWfYFF1xg2X5jMSfvcuIMB8l4cTfrA9YTfgupWLdw4I0Tc3gR9MMPP2zZH374oWXv27fP02aiSHOIpJBzCCdyDuEkZZqDx/L58+d7zuHYR9COBU899ZRlc3KQ30TbeCQ3N9eyWSuxFrvtttti3q+jo8NzjAvdtbS0WDZrK2kOkRRyDuFEziGcpCzZh3+rx1PZnwuOcBwjCN51yW8hNY/FnATDcyMcx+D4DT9n0MJswDsf8/3338c8n+M9rFF4jgnwT3ROFn05hBM5h3Ai5xBOUqY5OFeDd5cGvON3ZWWlZfM8CMMF41mz+CXecgyB5054boR3duLCK7xA2a9NnlvhBGPWFNzmjh07LJuTtf0WNXE/g+aQ4kFfDuFEziGcyDmEk5RpDs4x4GKugLcoPdtBm8xwfkdQgRLAO5/D4z3/nXMvghYtsV4AvLEPLpQbNP6zDuKcUr/n5jgHx1J4B8p40JdDOJFzCCdyDuEkZZqDC6lwTAIALrnkEsu+8sorLTtoPoaLt/LYHk8hFf79z8XZmKDiLxzDALw6prS01LJZn/G6FP77pk2bLJsXWgPegr9aSC1GFTmHcCLnEE7kHMJJygQpJ8n4BXo4YPTzzz9bNu+8zLAQS3RHRD844MR2ULKP305NLAY5cdpvR8nhsMhdunSpZfMO1n5t8kJqvwShIPTlEE7kHMKJnEM4SZnm4MklTuwFgqsDc7IPj63xVCxmeHxnjcB6gINevDibn8GvSA3rLw7OcVLzp59+atkcFPv4448t229R0++//27ZXGlxJOjLIZzIOYQTOYdwkjLNwRNtXPwN8C6EDloQxOMma5AjR45Ytl8SDMdbEq1QzAuOWaP4PQMnGLPmmDVrlmUHJTFznIMrIAPeatDd3d2WzYlS8aAvh3Ai5xBO5BzCSco0By8w8vudzXMGvKiJ4QXEPPfC471fbIXHfy5kF1SUNiiJOZ75HU625qIzQW189tlnlu2nH1h/9ff3B/YrCH05hBM5h3Ai5xBOUqY5ysrKLJuL3gNAUVGRZfMC4Y0bN1o2JxTzjkY8zvoVb+E8B45jcAyCz+fEXS7+5je3EqRzuA3uAxex5ev9CuMeO3bMsjnOkejukYC+HCIGcg7hRM4hnKR1R+rzzjvPslkjpCIHIQjOCeX8Du4jF3sJyikFvBoizlfspK6uzrI5dwPwaqn9+/dbNsdWVKRWJIWcQziRcwgnadUcQXAOCMdKOO+BiadgLMMxA87/4IXWHOfw2wCQXylfw3MtvJ5n8+bN7g6nCGkOkRRyDuFEziGcyDmEk7QKUr4HN82Ckye1OCDFE1x+gpQDUnxO0I7TQa/Hr01eCBVUkZBFLT9nskE0PyRIRVLIOYQTOYdwkrJkn3gIGucKCgosu6SkJOb5PDb7jf+sKdgOWqTEeoGTgVmjAP4FXYYTtAv2zp07Ldtvp+10oC+HcCLnEE7kHMJJWjVHEEHVhFkPcKINxz0Ar8aIp7DdcIIqAfsl+wbBGiI/Pz+hPqULfTmEEzmHcCLnEE7i1hyjEd8X4xt9OYQTOYdwIucQTuQcwomcQziRcwgncg7hRM4hnMg5hJP/ACZzb00BuFjAAAAAAElFTkSuQmCC", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'Image Type': , 'Image Shape': torch.Size([1, 28, 28]), 'Label Type': , 'Label Value': 2}\n" - ] - } - ], - "source": [ - "transform = transforms.Compose(\n", - " [\n", - " transforms.ToTensor(),\n", - " transforms.Normalize((0.5,), (0.5,)),\n", - " ]\n", - ")\n", - "train_dataset = datasets.FashionMNIST(root=\"./dataset\", train=True, transform=transform, download=True)\n", - "eval_size = min(int(len(train_dataset) * 0.1), 1000)\n", - "train_dataset, eval_dataset = random_split(train_dataset, [len(train_dataset) - eval_size, eval_size])\n", - "test_dataset = datasets.FashionMNIST(root=\"./dataset\", train=False, transform=transform, download=True)\n", - "print('Train Dataset Size:', len(train_dataset))\n", - "print('Valid Dataset Size:', len(eval_dataset))\n", - "print('Test Dataset Size:', len(test_dataset))\n", - "\n", - "image, label = train_dataset[0]\n", - "sample = {\n", - " 'Image Type': type(image),\n", - " 'Image Shape': image.shape,\n", - " 'Label Type': type(label),\n", - " 'Label Value': label\n", - "}\n", - "print('A Train Sample:\\n')\n", - "image = image * 0.5 + 0.5 # 将图像从 [-1, 1] 还原到 [0, 1] 以便更好地可视化\n", - "plt.figure(figsize=(1.5, 1.5))\n", - "plt.imshow(image.squeeze(), cmap='gray')\n", - "plt.title(f\"Label: {label}\")\n", - "plt.axis('off')\n", - "plt.show()\n", - "print(sample)\n", - "\n", - "num_classes = 10" - ] - }, - { - "cell_type": "markdown", - "id": "24594cbc-18b2-47eb-a526-c2ab37facf63", - "metadata": {}, - "source": [ - "开始训练。" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "d816dae1-5fbe-4c29-9597-19d66b5eb6b4", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "2cd0298a4a254c018e497a76ccfd0246", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/1160 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "hyper_params = {\n", - " 'learning_rate': 2.0e-1,\n", - " 'num_epochs': 20,\n", - " 'batch_size': 1024,\n", - "}\n", - "\n", - "model = Model_3_1(num_classes).to(device)\n", - "\n", - "trainer = SoftmaxTrainer(\n", - " model=model, \n", - " train_dataset=train_dataset, eval_dataset=eval_dataset, test_dataset=test_dataset, \n", - " optimizer='manual', criterion='manual', **hyper_params\n", - ")\n", - "trainer.train()" - ] - }, - { - "cell_type": "markdown", - "id": "a49d0165-aeb7-48c0-9b67-956bb08cb356", - "metadata": {}, - "source": [ - "模型正常收敛。" - ] - }, - { - "cell_type": "markdown", - "id": "3ef5240f-8a11-4678-bfce-f1cbc7e71b77", - "metadata": {}, - "source": [ - "## 问题2\n", - "\n", - "**利用torch.nn实现softmax回归在Fashion-MNIST数据集上进行训练和测试,并从loss,训练集以及测试集上的准确率等多个角度对结果进行分析**" - ] - }, - { - "cell_type": "markdown", - "id": "5c4a88c6-637e-4af5-bed5-f644685dcabc", - "metadata": {}, - "source": [ - "使用torch.nn实现模型。\n", - "\n", - "将之前的Model_3_1中的手动实现函数改为torch.nn内置函数即可,再加上继承nn.Module以使用torch.nn内置模型模板特性。" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "0163b9f7-1019-429c-8c29-06436d0a4c98", - "metadata": {}, - "outputs": [], - "source": [ - "class Model_3_2(nn.Module):\n", - " def __init__(self, num_classes):\n", - " super(Model_3_2, self).__init__()\n", - " self.flatten = nn.Flatten()\n", - " self.linear = nn.Linear(28 * 28, num_classes)\n", - "\n", - " def forward(self, x: torch.Tensor):\n", - " x = self.flatten(x)\n", - " x = self.linear(x)\n", - " return x" - ] - }, - { - "cell_type": "markdown", - "id": "6e765ad7-c1c6-4166-bd7f-361666bd4016", - "metadata": {}, - "source": [ - "训练与测试过程与之前手动实现的几乎一致。" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "6d241c05-b153-4f56-a845-0f2362f6459b", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "36bd868142c14b278e0c64868d513a84", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/1160 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "hyper_params = {\n", - " 'learning_rate': 2.0e-2,\n", - " 'num_epochs': 20,\n", - " 'batch_size': 1024,\n", - "}\n", - "\n", - "model = Model_3_2(num_classes).to(device)\n", - "\n", - "trainer = SoftmaxTrainer(\n", - " model=model, \n", - " train_dataset=train_dataset, eval_dataset=eval_dataset, test_dataset=test_dataset, \n", - " optimizer='manual', criterion='manual', **hyper_params\n", - ")\n", - "trainer.train()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Lab2/.ipynb_checkpoints/前馈神经网络实验-checkpoint.ipynb b/Lab2/.ipynb_checkpoints/前馈神经网络实验-checkpoint.ipynb deleted file mode 100644 index 4bb7e40..0000000 --- a/Lab2/.ipynb_checkpoints/前馈神经网络实验-checkpoint.ipynb +++ /dev/null @@ -1,3881 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "

研究生《深度学习》课程
实验报告

\n", - "
\n", - "
课程名称:深度学习 M502019B
\n", - "
实验题目:Pytorch基本操作实验
\n", - "
学号:25120323
\n", - "
姓名:柯劲帆
\n", - "
授课老师:原继东
\n", - "
报告日期:2025年8月4日
\n", - "
" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Pytorch version: 2.7.1+cu118\n", - "CUDA version: 11.8\n", - "CUDA device count: 1\n", - "CUDA device name: NVIDIA TITAN Xp\n", - "CUDA device capability: (6, 1)\n", - "CUDA device memory: 11.90 GB\n", - "CPU count: 4\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "import torch\n", - "from torch.autograd import Variable\n", - "from torch.utils.data import Dataset, DataLoader, Subset, random_split\n", - "from torch import nn\n", - "from torchvision import datasets, transforms\n", - "from multiprocessing import cpu_count\n", - "from sklearn.model_selection import KFold\n", - "import matplotlib.pyplot as plt\n", - "from tqdm.notebook import tqdm\n", - "import pandas as pd\n", - "from typing import Literal, Union, Optional\n", - "\n", - "print('Pytorch version:',torch.__version__)\n", - "if not torch.cuda.is_available():\n", - " print('CUDA is_available:', torch.cuda.is_available())\n", - "else:\n", - " print('CUDA version:', torch.version.cuda)\n", - " print('CUDA device count:', torch.cuda.device_count())\n", - " print('CUDA device name:', torch.cuda.get_device_name())\n", - " print('CUDA device capability:', torch.cuda.get_device_capability())\n", - " print('CUDA device memory:', f'{torch.cuda.get_device_properties(0).total_memory/1024/1024/1024:.2f}', 'GB')\n", - "print('CPU count:', cpu_count())\n", - "\n", - "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", - "seed = 42\n", - "np.random.seed(seed)\n", - "torch.manual_seed(seed)\n", - "torch.cuda.manual_seed(seed)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 任务一\n", - "**手动实现前馈神经网络解决上述回归、二分类、多分类任务。**\n", - "- 从训练时间、预测精度、Loss变化等角度分析实验结果(最好使用图表展示)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "torch.cuda.empty_cache()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "首先生成数据集。\n", - "\n", - "一共有3个数据集:\n", - "\n", - "1. 回归任务数据集。\n", - " - 生成单个数据集。\n", - " - 数据集的大小为$10000$且训练集大小为$7000$,测试集大小为$3000$。\n", - " - 数据集的样本特征维度$p$为$500$,且服从如下的高维线性函数:$y = 0.028 + \\sum_{i=1}^{p}0.0056 x_i + \\epsilon $。\n", - "2. 二分类任务数据集。\n", - " - 共生成两个数据集。\n", - " - 两个数据集的大小均为$10000$且训练集大小为$7000$,测试集大小为$3000$。\n", - " - 两个数据集的样本特征$x$的维度均为$200$,且分别服从均值互为相反数且方差相同的正态分布。\n", - " - 两个数据集的样本标签分别为$0$和$1$。\n", - "3. MNIST手写体数据集。\n", - " - 该数据集包含$60,000$个用于训练的图像样本和$10,000$个用于测试的图像样本。\n", - " - 图像是固定大小($28\\times 28$像素),其标签为$0$到$10$。 " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "实现回归任务数据集。" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "训练数据集大小:7000,测试数据集大小:3000\n", - "训练数据集的第1对数据:\n", - "输入x[0]第1个特征维度数据x[0][0]:-0.25091975927352905\n", - "标签y[0]:tensor([-1.0123])\n" - ] - } - ], - "source": [ - "class My_Regression_Dataset(Dataset):\n", - " def __init__(self, train=True, num_features=500):\n", - " data_size = (7000 if train else 3000)\n", - " x = np.random.uniform(low=-1, high=1, size=(data_size, num_features))\n", - " noise = np.random.normal(loc=0, scale=1, size=(data_size, 1))\n", - " y = 0.028 - 0.0056 * x.sum(axis=1, keepdims=True) + noise\n", - " self.inputs = torch.tensor(x, dtype=torch.float32)\n", - " self.labels = torch.tensor(y, dtype=torch.float32)\n", - "\n", - " def __len__(self):\n", - " return self.inputs.shape[0]\n", - "\n", - " def __getitem__(self, index):\n", - " return self.inputs[index], self.labels[index]\n", - "\n", - " \n", - "# 测试,并后面的训练创建变量\n", - "train_regression_dataset = My_Regression_Dataset(train=True)\n", - "test_regression_dataset = My_Regression_Dataset(train=False)\n", - "print(\n", - " f\"训练数据集大小:{len(train_regression_dataset)},\"\n", - " f\"测试数据集大小:{len(test_regression_dataset)}\"\n", - ")\n", - "x0, y0 = train_regression_dataset[0]\n", - "print(f\"训练数据集的第1对数据:\")\n", - "print(f\"输入x[0]第1个特征维度数据x[0][0]:{x0[0]}\")\n", - "print(f\"标签y[0]:{y0}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "实现二分类任务数据集。" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "训练数据集大小:14000,测试数据集大小:6000\n", - "训练数据集的第1对数据:\n", - "x[0]第1个特征维度数据x[0][0] = -0.4228987991809845\n", - "y[0] = tensor([0.])\n", - "训练数据集的第7001对数据:\n", - "x[7000]第1个特征维度数据x[7000][0] = 0.4295016825199127\n", - "y[7000] = tensor([1.])\n" - ] - } - ], - "source": [ - "class My_BinaryCLS_Dataset(Dataset):\n", - " def __init__(self, train=True, num_features=200):\n", - " num_samples = (7000 if train else 3000)\n", - " x_1 = np.random.normal(loc=-0.5, scale=0.2, size=(num_samples, num_features))\n", - " x_2 = np.random.normal(loc=0.5, scale=0.2, size=(num_samples, num_features))\n", - " labels_1, labels_2 = np.zeros((num_samples, 1)), np.ones((num_samples, 1))\n", - " self.inputs = torch.tensor(np.concatenate((x_1, x_2), axis=0), dtype=torch.float32)\n", - " self.labels = torch.tensor(np.concatenate((labels_1, labels_2), axis=0), dtype=torch.float32)\n", - "\n", - " def __len__(self):\n", - " return self.inputs.shape[0]\n", - "\n", - " def __getitem__(self, index):\n", - " return self.inputs[index], self.labels[index]\n", - "\n", - "\n", - "# 测试,并后面的训练创建变量\n", - "train_binarycls_dataset = My_BinaryCLS_Dataset(train=True)\n", - "test_binarycls_dataset = My_BinaryCLS_Dataset(train=False)\n", - "\n", - "print(\n", - " f\"训练数据集大小:{len(train_binarycls_dataset)},\"\n", - " f\"测试数据集大小:{len(test_binarycls_dataset)}\"\n", - ")\n", - "x0, y0 = train_binarycls_dataset[0]\n", - "print(f\"训练数据集的第1对数据:\")\n", - "print(f\"x[0]第1个特征维度数据x[0][0] = {x0[0]}\")\n", - "print(f\"y[0] = {y0}\")\n", - "\n", - "x7000, y7000 = train_binarycls_dataset[7000]\n", - "print(f\"训练数据集的第7001对数据:\")\n", - "print(f\"x[7000]第1个特征维度数据x[7000][0] = {x7000[0]}\")\n", - "print(f\"y[7000] = {y7000}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "使用MNIST数据集。" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "调用`torchvision.datasets.MNIST()`,获取数据集。" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "训练数据集大小:60000,测试数据集大小:10000\n", - "A Train Sample:\n", - "\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAIcAAACdCAYAAACeqmv3AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAACYZJREFUeJzt3WtIFG8bBvB73zTXzIzKA4htmGZZykoHQxS1AxYaaAlhSEUlEQoSZCcolUgLsyIUC6KDJBWIlRlURFpEsSYdoIOdSEgJ28y0Mg31+X94cfGZ8XbXVt3dvH7gh2ua2XmUq/HZcWdGI4QQBDCA/9l6AGC/UA5goRzAQjmAhXIAC+UAFsoBLJQDWCgHsP7pcjQ0NJBGo6EjR44M22vW1NSQRqOhmpqaYXtNe2V35Th37hxpNBqqq6uz9VBGRE5ODmk0GtWXVqu19dBUnGw9gLGqpKSEJk6caMrjxo2z4WgGhnLYSHJyMk2bNs3WwxiU3f1ascSfP39o//79NH/+fPLw8CA3NzeKioqi6upqdptjx46RTqcjV1dXio6OphcvXqjWqa+vp+TkZJoyZQpptVpasGABVVZWmh1PR0cH1dfX09evXy3+HoQQ1N7eTvb8R3GHLEd7ezudPn2aYmJi6PDhw5STk0NGo5Hi4uLo2bNnqvVLS0vpxIkTlJ6eTnv27KEXL17QkiVLqLm52bTOy5cvafHixfT69WvavXs3FRYWkpubGyUmJtKVK1cGHU9tbS3NmTOHioqKLP4e/P39ycPDg9zd3Sk1NVUai90Qdubs2bOCiMTjx4/Zdbq7u0VXV5e0rLW1VXh7e4tNmzaZln38+FEQkXB1dRWNjY2m5QaDQRCR2L59u2nZ0qVLRUhIiOjs7DQt6+3tFRERESIwMNC0rLq6WhCRqK6uVi3Lzs42+/0dP35cZGRkiLKyMlFeXi4yMzOFk5OTCAwMFG1tbWa3H00OWY7+enp6REtLizAajSI+Pl7o9XrTv/WVIyUlRbVdeHi4CAoKEkII0dLSIjQajThw4IAwGo3SV25uriAiU7kGKoe1ysrKBBGJ/Pz8YXvN4eCQv1aIiM6fP0+hoaGk1Wpp6tSp5OnpSTdu3KC2tjbVuoGBgapls2bNooaGBiIiev/+PQkhaN++feTp6Sl9ZWdnExHRly9fRux7WbduHfn4+NCdO3dGbB9/wyHfrVy4cIE2btxIiYmJlJWVRV5eXjRu3DjKz8+nDx8+DPn1ent7iYhox44dFBcXN+A6AQEBVo3ZHD8/P/r27duI7mOoHLIc5eXl5O/vTxUVFaTRaEzL+/6XK71790617O3btzRjxgwi+v/kkIjI2dmZli1bNvwDNkMIQQ0NDRQWFjbq+x6MQ/5a6TthJPq9DTQYDPTo0aMB17969So1NTWZcm1tLRkMBlq5ciUREXl5eVFMTAydOnWKPn/+rNreaDQOOp6hvJUd6LVKSkrIaDTSihUrzG4/muz2yHHmzBm6efOmanlmZiYlJCRQRUUFJSUlUXx8PH38+JFOnjxJwcHB9PPnT9U2AQEBFBkZSdu2baOuri46fvw4TZ06lXbu3Glap7i4mCIjIykkJITS0tLI39+fmpub6dGjR9TY2EjPnz9nx1pbW0uxsbGUnZ1NOTk5g35fOp2O1q5dSyEhIaTVaunBgwd06dIl0uv1tHXrVst/QKPBxhNilb53K9zXp0+fRG9vr8jLyxM6nU64uLiIsLAwUVVVJTZs2CB0Op3ptfrerRQUFIjCwkLh5+cnXFxcRFRUlHj+/Llq3x8+fBDr168XPj4+wtnZWfj6+oqEhARRXl5uWsfat7JbtmwRwcHBwt3dXTg7O4uAgACxa9cu0d7ebs2PbURohLDjU3RgUw4554DRgXIAC+UAFsoBLJQDWCgHsFAOYFl8hrT/3zDA8VlyegtHDmChHMBCOYCFcgAL5QAWygEslANYKAewUA5goRzAQjmAhXIAC+UAFsoBLJQDWCgHsFAOYKEcwEI5gGW3V9mPFOX9Pj08PIa0fUZGhpQnTJigWicoKEjK6enpUlbeUTklJUXKnZ2dUj506JCUc3NzLRuslXDkABbKASyUA1gONeeYPn26lMePHy/liIgI1TaRkZFSnjx5spTXrFkzPIPrp7GxUconTpyQclJSkpR//PghZeVdhO7duzeMo7McjhzAQjmAhXIAy+J7gtniWlm9Xi/lu3fvSnmo5yhGQt8NbvvbtGmTlAe6w2F/yttbtra2SvnNmzd/OToerpUFq6AcwEI5gGXXc44pU6ZI2WAwSLnvnuXDSbmP79+/Szk2NlbKf/78Ub2GPcyFzMGcA6yCcgAL5QCWXf9tRflwmqysLCknJCRI+enTp6rXUP5dQ0n5wMDly5dL+devX1KeO3eulDMzMwd9fUeGIwewUA5goRzAsuvzHOZMmjRJysrPRRARnTp1SsqbN2+WcmpqqpQvXrw4TKOzbzjPAVZBOYCFcgAL5QCWXZ8EM6e9vd3sOgM9vry/tLQ0KV++fFnKA32YZ6zAkQNYKAewUA5gOfRJMEu4ublJ+fr161KOjo6Wct/z7fvcvn17ZAZmYzgJBlZBOYCFcgDrn59zKM2cOVPKT548kbLyA8XV1dVSrqurk3JxcbFqHxb+SG0Kcw6wCsoBLJQDWGNuzqGkvJHK2bNnpezu7j7o9nv37lUtKy0tlbLyQml7gDkHWAXlABbKAawxP+dQmjdvnpSPHj0q5aVLl5p9DeWHmg8ePCjlpqamvxzd8MGcA6yCcgAL5QAW5hxmKG9qu2rVKikrz4sQqX9WyhvdKS/WtgXMOcAqKAewUA5gYc5hpa6uLtUyJyf5cqDu7m4px8XFSbmmpmbYx2UO5hxgFZQDWCgHsFAOYDn0hdQjITQ0VMrJyclSXrhwoZSVk8+BvHr1Ssr379//y9GNLhw5gIVyAAvlANaYm3MonxatfML06tWrpezj4zPkffT09EhZ+QFjR7khDI4cwEI5gIVyAOufmnMMND9ISUmRsnKOMWPGDKv2qbywmkj9geLKykqr9mErOHIAC+UAFsoBLIeac3h7e0s5ODhYykVFRaptZs+ebdU+lU+LLCgokPK1a9dU2zjKeQxzcOQAFsoBLJQDWHY151A+gVp5QbJer5fycDyR+uHDh1IuLCyU8q1bt6T8+/dvq/fpKHDkABbKASyUA1ijOucIDw+XsvIJ04sWLZKyr6+v1fvs6OiQsvIJ1Xl5eVJWPoF6LMORA1goB7BQDmCN6pxDeUNYZTZHef1HVVWVlJUXLBOpz1sob3wPPBw5gIVyAAvlABbKASzc2WeMwp19wCooB7BQDmChHMBCOYCFcgAL5QAWygEslANYKAewUA5gWfxhHwv/BAP/EBw5gIVyAAvlABbKASyUA1goB7BQDmChHMBCOYD1H8F52RZgXt0eAAAAAElFTkSuQmCC", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'Image Type': , 'Image Shape': torch.Size([1, 28, 28]), 'Label Type': , 'Label Value': 5}\n" - ] - } - ], - "source": [ - "transform = transforms.Compose(\n", - " [\n", - " transforms.ToTensor(),\n", - " transforms.Normalize((0.5,), (0.5,)),\n", - " ]\n", - ")\n", - "\n", - "train_mnist_dataset = datasets.MNIST(root=\"dataset\", train=True, transform=transform, download=True)\n", - "test_mnist_dataset = datasets.MNIST(root=\"dataset\", train=False, transform=transform, download=True)\n", - "print(\n", - " f\"训练数据集大小:{len(train_mnist_dataset)},\"\n", - " f\"测试数据集大小:{len(test_mnist_dataset)}\"\n", - ")\n", - "\n", - "image, label = train_mnist_dataset[0]\n", - "sample = {\n", - " 'Image Type': type(image),\n", - " 'Image Shape': image.shape,\n", - " 'Label Type': type(label),\n", - " 'Label Value': label\n", - "}\n", - "print('A Train Sample:\\n')\n", - "plt.figure(figsize=(1.5, 1.5))\n", - "plt.imshow(image.squeeze(0), cmap='gray')\n", - "plt.title(f\"Label: {label}\")\n", - "plt.axis('off')\n", - "plt.show()\n", - "print(sample)\n", - "\n", - "image_width, image_height = 28, 28\n", - "num_classes = 10" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "接下来手动实现前馈神经网络并训练。" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "首先手动实现一些工具和基本模型层。这些工具都在前一个实验中实现并测试过,在此就不再分析其原理和具体实现步骤,也不在此重新测试。" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "# 手动实现torch.nn.functional.softmax\n", - "def my_softmax(x: torch.Tensor, dim: int):\n", - " max_x = torch.max(x, dim=dim, keepdim=True).values\n", - " exp_x = torch.exp(x - max_x)\n", - " return exp_x / torch.sum(exp_x, dim=dim, keepdim=True)\n", - "\n", - "\n", - "# 手动实现torch.nn.Linear\n", - "class My_Linear:\n", - " def __init__(self, in_features: int, out_features: int):\n", - " self.weight = torch.randn((out_features, in_features), requires_grad=True, dtype=torch.float32)\n", - " self.bias = torch.zeros(1, requires_grad=True, dtype=torch.float32)\n", - " self.params = [self.weight, self.bias]\n", - "\n", - " def __call__(self, x: torch.Tensor):\n", - " return self.forward(x)\n", - "\n", - " def forward(self, x: torch.Tensor):\n", - " x = torch.matmul(x, self.weight.T) + self.bias\n", - " return x\n", - "\n", - " def to(self, device: str):\n", - " for param in self.params:\n", - " param.data = param.data.to(device=device)\n", - " return self\n", - "\n", - " def parameters(self):\n", - " return self.params\n", - "\n", - " \n", - "# 手动实现torch.nn.Flatten\n", - "class My_Flatten:\n", - " def __call__(self, x: torch.Tensor):\n", - " x = x.view(x.shape[0], -1)\n", - " return x\n", - "\n", - " \n", - "# 手动实现torch.nn.ReLU\n", - "class My_ReLU():\n", - " def __call__(self, x: torch.Tensor):\n", - " return torch.where(x > 0, x, torch.zeros_like(x))\n", - "\n", - "\n", - "# 手动实现torch.nn.LeakyReLU\n", - "class My_LeakyReLU():\n", - " def __init__(self, negative_slope=0.01):\n", - " self.negative_slope = negative_slope\n", - " \n", - " def __call__(self, x: torch.Tensor):\n", - " return torch.where(x > 0, x, x * self.negative_slope)\n", - "\n", - "\n", - "# 手动实现torch.nn.Sigmoid\n", - "class My_Sigmoid():\n", - " def __call__(self, x: torch.Tensor):\n", - " z = torch.exp(-x.abs())\n", - " return torch.where(x >= 0, 1 / (1 + z), z / (1 + z))\n", - "\n", - "\n", - "# 手动实现torch.nn.Softmax\n", - "class My_Softmax:\n", - " def __init__(self, dim: int):\n", - " self.dim = dim\n", - " def __call__(self, x: torch.Tensor):\n", - " max_x = torch.max(x, dim=self.dim, keepdim=True).values\n", - " exp_x = torch.exp(x - max_x)\n", - " return exp_x / torch.sum(exp_x, dim=self.dim, keepdim=True)\n", - "\n", - "\n", - "# 手动实现torch.nn.MSELoss\n", - "class My_MSELoss: \n", - " def __call__(self, prediction: torch.Tensor, target: torch.Tensor):\n", - " loss = torch.mean(torch.square(prediction - target))\n", - " return loss\n", - "\n", - "\n", - "# 手动实现torch.nn.BCELoss\n", - "class My_BCELoss:\n", - " def __call__(self, prediction: torch.Tensor, target: torch.Tensor):\n", - " eps = 1e-9\n", - " loss = -torch.mean(target * torch.log(prediction + eps) + (1 - target) * torch.log(1 - prediction + eps))\n", - " return loss\n", - "\n", - "\n", - "# 手动实现torch.nn.CrossEntropyLoss\n", - "class My_CrossEntropyLoss:\n", - " def __call__(\n", - " self, \n", - " x: torch.Tensor, \n", - " targets: torch.Tensor, \n", - " reduction: Literal[\"mean\", \"sum\"] = \"mean\"\n", - " ):\n", - " max_x = torch.max(x, dim=1, keepdim=True).values\n", - " exp_x = torch.exp(x - max_x)\n", - " log_probs = x - max_x - torch.log(torch.sum(exp_x, dim=1, keepdim=True))\n", - " \n", - " if len(x.shape) == len(targets.shape) + 1:\n", - " nll_loss = -log_probs.gather(1, targets.unsqueeze(-1)).squeeze()\n", - " else:\n", - " nll_loss = -torch.sum(targets * log_probs, dim=1)\n", - " \n", - " if reduction == \"mean\": \n", - " return torch.mean(nll_loss)\n", - " else: \n", - " return torch.sum(nll_loss)\n", - "\n", - "\n", - "# 手动实现torch.optim.SGD\n", - "class My_Optimizer:\n", - " def __init__(self, params: list[torch.Tensor], lr: float):\n", - " self.params = list(params)\n", - " self.lr = lr\n", - "\n", - " def step(self):\n", - " for param in self.params:\n", - " if param.grad is not None:\n", - " param.data = param.data - self.lr * param.grad.data\n", - "\n", - " def zero_grad(self):\n", - " for param in self.params:\n", - " if param.grad is not None:\n", - " param.grad.data.zero_()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "接下来实现Regression回归的Trainer,包括训练流程、测试和画图。\n", - "\n", - "训练Regression回归模型,进行如下步骤:\n", - "1. 定义模型、数据集、损失函数、优化器和其他超参数\n", - "2. 训练\n", - " 1. 从训练dataloader中获取批量数据\n", - " 2. 传入模型\n", - " 3. 使用损失函数计算与ground_truth的损失\n", - " 4. 使用优化器进行反向传播\n", - " 5. 循环以上步骤\n", - "3. 验证及测试\n", - " 1. 从验证或测试dataloader中获取批量数据\n", - " 2. 传入模型,验证时需要将模型输出与ground_truth进行比较得计算loss\n", - " 3. 将预测值与ground_truth进行比较,得出正确率\n", - " 4. 对整个训练集统计正确率,从而分析训练效果" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "class RegressionTrainer():\n", - " def __init__(\n", - " self,\n", - " model,\n", - " train_dataset: Union[Dataset, DataLoader],\n", - " eval_dataset: Union[Dataset, DataLoader],\n", - " optimizer: Literal['torch', 'manual'],\n", - " criterion: Literal['torch', 'manual'],\n", - " learning_rate: float,\n", - " num_epochs: int,\n", - " batch_size: int,\n", - " test_dataset: Union[Dataset, DataLoader] = None,\n", - " plot: bool = True, \n", - " print_test_result = True,\n", - " return_curves: bool = False,\n", - " log_epoch: int = 1\n", - " ):\n", - " self.model = model\n", - " self.learning_rate = learning_rate\n", - " self.num_epochs = num_epochs\n", - " self.batch_size = batch_size\n", - " self.plot = plot\n", - " self.print_test_result = print_test_result\n", - " self.return_curves = return_curves\n", - " self.log_epoch = log_epoch\n", - "\n", - " if isinstance(train_dataset, Dataset):\n", - " self.train_dataloader = DataLoader(\n", - " dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=cpu_count()\n", - " )\n", - " else:\n", - " self.train_dataloader = train_dataset\n", - " if isinstance(eval_dataset, Dataset):\n", - " self.eval_dataloader = DataLoader(\n", - " dataset=eval_dataset, batch_size=batch_size, shuffle=True, num_workers=cpu_count()\n", - " )\n", - " else:\n", - " self.eval_dataloader = eval_dataset\n", - " if isinstance(test_dataset, Dataset):\n", - " self.test_dataloader = DataLoader(\n", - " dataset=test_dataset, batch_size=batch_size, shuffle=True, num_workers=cpu_count()\n", - " )\n", - " else:\n", - " self.test_dataloader = test_dataset\n", - "\n", - " if optimizer == 'torch':\n", - " self.optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)\n", - " else:\n", - " self.optimizer = My_Optimizer(model.parameters(), lr=learning_rate)\n", - "\n", - " if criterion == 'torch':\n", - " self.criterion = nn.MSELoss()\n", - " else:\n", - " self.criterion = My_MSELoss()\n", - "\n", - " def train(self):\n", - " train_loss_curve = []\n", - " eval_loss_curve = []\n", - " eval_error_curve = []\n", - " step = 0\n", - " total_train_steps = self.num_epochs * len(self.train_dataloader)\n", - " with tqdm(total=total_train_steps) as pbar:\n", - " for epoch in range(self.num_epochs):\n", - " total_train_loss = 0\n", - " for x, targets in self.train_dataloader:\n", - " x = x.to(device=device, dtype=torch.float32)\n", - " targets = targets.to(device=device, dtype=torch.float32)\n", - "\n", - " self.optimizer.zero_grad()\n", - " output = self.model(x)\n", - " loss = self.criterion(output, targets)\n", - " total_train_loss += loss.item()\n", - " train_loss_curve.append(loss.item())\n", - " \n", - " loss.backward()\n", - " self.optimizer.step()\n", - " step += 1\n", - " pbar.update(1)\n", - "\n", - " avg_eval_loss, avg_eval_error = self.eval()\n", - " eval_loss_curve.append(avg_eval_loss)\n", - " eval_error_curve.append(avg_eval_error)\n", - " if self.log_epoch > 0 and (epoch + 1) % self.log_epoch == 0:\n", - " log_info = {\n", - " 'Epoch': f'{epoch + 1}/{self.num_epochs}',\n", - " 'Total Train Loss': f'{total_train_loss:.2f}',\n", - " 'Scaled Total Valid Loss': f'{avg_eval_loss * len(self.train_dataloader):.2f}',\n", - " 'Avg Valid Error': f'{avg_eval_error:.2f}'\n", - " }\n", - " print(log_info)\n", - "\n", - " return_info = {}\n", - " if self.test_dataloader:\n", - " test_error = self.test()\n", - " if self.print_test_result:\n", - " print('Avg Test Error:', f'{test_error:.2f}')\n", - " return_info['test_error'] = test_error\n", - " if self.plot:\n", - " self.plot_results(train_loss_curve, eval_loss_curve, eval_error_curve)\n", - " if self.return_curves:\n", - " curves = {\n", - " 'train_loss_curve': train_loss_curve,\n", - " 'eval_loss_curve': eval_loss_curve,\n", - " 'eval_error_curve': eval_error_curve\n", - " }\n", - " return_info['curves'] = curves\n", - " return return_info\n", - "\n", - " def eval(self):\n", - " total_eval_loss = 0\n", - " total_eval_error = 0\n", - " total_eval_samples = 0\n", - " with torch.inference_mode():\n", - " for x, targets in self.eval_dataloader:\n", - " x = x.to(device=device, dtype=torch.float32)\n", - " targets = targets.to(device=device, dtype=torch.float32)\n", - " output = self.model(x)\n", - " loss = self.criterion(output, targets)\n", - " total_eval_loss += loss.item()\n", - " total_eval_error += torch.square(output - targets).sum().item()\n", - " total_eval_samples += targets.numel()\n", - " avg_eval_loss = total_eval_loss / len(self.eval_dataloader)\n", - " avg_eval_error = total_eval_error / total_eval_samples\n", - " return avg_eval_loss, avg_eval_error\n", - "\n", - " def test(self):\n", - " total_test_error = 0\n", - " total_test_samples = 0\n", - " with torch.inference_mode():\n", - " for x, targets in self.test_dataloader:\n", - " x = x.to(device=device, dtype=torch.float32)\n", - " targets = targets.to(device=device, dtype=torch.float32)\n", - " output = self.model(x)\n", - " total_test_error += torch.square(output - targets).sum().item()\n", - " total_test_samples += targets.numel()\n", - " avg_test_error = total_test_error / total_test_samples\n", - " return avg_test_error\n", - " \n", - " def plot_results(self, train_loss_curve, eval_loss_curve, eval_error_curve):\n", - " fig, axes = plt.subplots(1, 2, figsize=(10, 4))\n", - " \n", - " axes[0].plot(train_loss_curve, label='Training Loss', color='blue')\n", - " axes[0].plot(\n", - " np.linspace(len(self.train_dataloader), len(train_loss_curve), len(eval_loss_curve), endpoint=True),\n", - " eval_loss_curve, label='Validation Loss', color='orange'\n", - " )\n", - " axes[0].set_xlabel('Step')\n", - " axes[0].set_ylabel('Loss')\n", - " axes[0].set_title('Loss Curve')\n", - " axes[0].legend()\n", - " axes[0].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - " \n", - " axes[1].plot(eval_error_curve, label='Validation Error', color='red', marker='o')\n", - " axes[1].set_xlabel('Epoch')\n", - " axes[1].set_ylabel('Error')\n", - " axes[1].set_title('Validation Error Curve')\n", - " axes[1].legend()\n", - " axes[1].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - " \n", - " plt.tight_layout()\n", - " plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "手动构建回归任务的模型。" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "class Model_1_1:\n", - " def __init__(self, input_features=500, output_features=1):\n", - " self.linear = My_Linear(in_features=input_features, out_features=output_features)\n", - " self.params = self.linear.params\n", - "\n", - " def __call__(self, x):\n", - " return self.forward(x)\n", - "\n", - " def forward(self, x):\n", - " x = self.linear(x)\n", - " return x\n", - "\n", - " def to(self, device: str):\n", - " for param in self.params:\n", - " param.data = param.data.to(device=device)\n", - " return self\n", - "\n", - " def parameters(self):\n", - " return self.params\n", - " \n", - " def train(self):\n", - " for param in self.params:\n", - " param.requires_grad = True\n", - " \n", - " def eval(self):\n", - " for param in self.params:\n", - " param.requires_grad = False" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "开始训练。" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "8d341fbe13fc4c4bbb254738ed3a3210", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/70 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "hyper_params = {\n", - " 'learning_rate': 1.0e-1,\n", - " 'num_epochs': 10,\n", - " 'batch_size': 1024,\n", - "}\n", - "\n", - "model = Model_1_1().to(device)\n", - "\n", - "trainer = RegressionTrainer(\n", - " model=model, \n", - " train_dataset=train_regression_dataset, eval_dataset=test_regression_dataset,\n", - " optimizer='manual', criterion='manual', **hyper_params\n", - ")\n", - "_ = trainer.train()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "接下来实现二分类任务的Trainer,包括训练流程、测试和画图。\n", - "\n", - "训练二分类任务模型,进行如下步骤:\n", - "1. 定义模型、数据集、损失函数、优化器和其他超参数\n", - "2. 训练\n", - " 1. 从训练dataloader中获取批量数据\n", - " 2. 传入模型\n", - " 3. 使用损失函数计算与ground_truth的损失\n", - " 4. 使用优化器进行反向传播\n", - " 5. 循环以上步骤\n", - "3. 验证及测试\n", - " 1. 从验证或测试dataloader中获取批量数据\n", - " 2. 传入模型,验证时需要将模型输出与ground_truth进行比较得计算loss\n", - " 3. 将预测值与ground_truth进行比较,得出正确率\n", - " 4. 对整个训练集统计正确率,从而分析训练效果" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "class BinaryCLSTrainer():\n", - " def __init__(\n", - " self,\n", - " model,\n", - " train_dataset: Union[Dataset, DataLoader],\n", - " eval_dataset: Union[Dataset, DataLoader],\n", - " optimizer: Literal['torch', 'manual'],\n", - " criterion: Literal['torch', 'manual'],\n", - " learning_rate: float,\n", - " num_epochs: int,\n", - " batch_size: int,\n", - " test_dataset: Union[Dataset, DataLoader] = None,\n", - " plot: bool = True, \n", - " print_test_result: bool = True,\n", - " return_curves: bool = False,\n", - " log_epoch: int = 1\n", - " ):\n", - " self.model = model\n", - " self.learning_rate = learning_rate\n", - " self.num_epochs = num_epochs\n", - " self.batch_size = batch_size\n", - " self.plot = plot\n", - " self.print_test_result = print_test_result\n", - " self.return_curves = return_curves\n", - " self.log_epoch = log_epoch\n", - "\n", - " if isinstance(train_dataset, Dataset):\n", - " self.train_dataloader = DataLoader(\n", - " dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=cpu_count()\n", - " )\n", - " else:\n", - " self.train_dataloader = train_dataset\n", - " if isinstance(eval_dataset, Dataset):\n", - " self.eval_dataloader = DataLoader(\n", - " dataset=eval_dataset, batch_size=batch_size, shuffle=True, num_workers=cpu_count()\n", - " )\n", - " else:\n", - " self.eval_dataloader = eval_dataset\n", - " if isinstance(test_dataset, Dataset):\n", - " self.test_dataloader = DataLoader(\n", - " dataset=test_dataset, batch_size=batch_size, shuffle=True, num_workers=cpu_count()\n", - " )\n", - " else:\n", - " self.test_dataloader = test_dataset\n", - "\n", - " if optimizer == 'torch':\n", - " self.optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)\n", - " else:\n", - " self.optimizer = My_Optimizer(model.parameters(), lr=learning_rate)\n", - "\n", - " if criterion == 'torch':\n", - " self.criterion = nn.BCELoss()\n", - " else:\n", - " self.criterion = My_BCELoss()\n", - "\n", - " def train(self):\n", - " train_loss_curve = []\n", - " eval_loss_curve = []\n", - " eval_acc_curve = []\n", - " step = 0\n", - " total_train_steps = self.num_epochs * len(self.train_dataloader)\n", - " with tqdm(total=total_train_steps) as pbar:\n", - " for epoch in range(self.num_epochs):\n", - " total_train_loss = 0\n", - " for x, targets in self.train_dataloader:\n", - " x = x.to(device=device, dtype=torch.float32)\n", - " targets = targets.to(device=device, dtype=torch.float32)\n", - "\n", - " self.optimizer.zero_grad()\n", - " output = self.model(x)\n", - " loss = self.criterion(output, targets)\n", - " total_train_loss += loss.item()\n", - " train_loss_curve.append(loss.item())\n", - " \n", - " loss.backward()\n", - " self.optimizer.step()\n", - "\n", - " step += 1\n", - " pbar.update(1)\n", - "\n", - " avg_eval_loss, avg_eval_acc = self.eval()\n", - " eval_loss_curve.append(avg_eval_loss)\n", - " eval_acc_curve.append(avg_eval_acc)\n", - " if self.log_epoch > 0 and (epoch + 1) % self.log_epoch == 0:\n", - " log_info = {\n", - " 'Epoch': f'{epoch + 1}/{self.num_epochs}',\n", - " 'Total Train Loss': f'{total_train_loss:.2f}',\n", - " 'Scaled Total Valid Loss': f'{avg_eval_loss * len(self.train_dataloader):.2f}',\n", - " 'Avg Valid Acc': f'{avg_eval_acc:.2%}'\n", - " }\n", - " print(log_info)\n", - "\n", - " return_info = {}\n", - " if self.test_dataloader:\n", - " test_acc = self.test()\n", - " if self.print_test_result:\n", - " print('Avg Test Acc:', f'{test_acc:.2%}')\n", - " return_info['test_acc'] = test_acc\n", - " if self.plot:\n", - " self.plot_results(train_loss_curve, eval_loss_curve, eval_acc_curve)\n", - " if self.return_curves:\n", - " curves = {\n", - " 'train_loss_curve': train_loss_curve,\n", - " 'eval_loss_curve': eval_loss_curve,\n", - " 'eval_acc_curve': eval_acc_curve\n", - " }\n", - " return_info['curves'] = curves\n", - " return return_info\n", - "\n", - " def eval(self):\n", - " total_eval_loss = 0\n", - " total_eval_acc = 0\n", - " total_eval_samples = 0\n", - " with torch.inference_mode():\n", - " for x, targets in self.eval_dataloader:\n", - " x = x.to(device=device, dtype=torch.float32)\n", - " targets = targets.to(device=device, dtype=torch.float32)\n", - " output = self.model(x)\n", - " loss = self.criterion(output, targets)\n", - " total_eval_loss += loss.item()\n", - " preds = (output >= 0.5).float()\n", - " total_eval_acc += (preds == targets.to(dtype=torch.long)).float().sum().item()\n", - " total_eval_samples += targets.numel()\n", - " avg_eval_loss = total_eval_loss / len(self.eval_dataloader)\n", - " avg_eval_acc = total_eval_acc / total_eval_samples\n", - " return avg_eval_loss, avg_eval_acc\n", - "\n", - " def test(self):\n", - " total_test_acc = 0\n", - " total_test_samples = 0\n", - " with torch.inference_mode():\n", - " for x, targets in self.test_dataloader:\n", - " x = x.to(device=device, dtype=torch.float32)\n", - " targets = targets.to(device=device, dtype=torch.long)\n", - " output = self.model(x)\n", - " preds = (output >= 0.5).float()\n", - " total_test_acc += (preds == targets).float().sum().item()\n", - " total_test_samples += targets.numel()\n", - " avg_test_acc = total_test_acc / total_test_samples\n", - " return avg_test_acc\n", - " \n", - " def plot_results(self, train_loss_curve, eval_loss_curve, eval_acc_curve):\n", - " fig, axes = plt.subplots(1, 2, figsize=(10, 4))\n", - " \n", - " axes[0].plot(train_loss_curve, label='Training Loss', color='blue')\n", - " axes[0].plot(\n", - " np.linspace(len(self.train_dataloader), len(train_loss_curve), len(eval_loss_curve), endpoint=True),\n", - " eval_loss_curve, label='Validation Loss', color='orange'\n", - " )\n", - " axes[0].set_xlabel('Step')\n", - " axes[0].set_ylabel('Loss')\n", - " axes[0].set_title('Loss Curve')\n", - " axes[0].legend()\n", - " axes[0].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - " \n", - " axes[1].plot(eval_acc_curve, label='Validation Accuracy', color='green', marker='o')\n", - " axes[1].set_xlabel('Epoch')\n", - " axes[1].set_ylabel('Accuracy')\n", - " axes[1].set_title('Validation Accuracy Curve')\n", - " axes[1].legend()\n", - " axes[1].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - " \n", - " plt.tight_layout()\n", - " plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "手动构建二分类任务的模型。" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "class Model_1_2:\n", - " def __init__(self, input_features=200):\n", - " self.fc = My_Linear(in_features=input_features, out_features=1)\n", - " self.sigmoid = My_Sigmoid()\n", - " self.params = self.fc.parameters()\n", - "\n", - " def __call__(self, x):\n", - " return self.forward(x)\n", - "\n", - " def forward(self, x):\n", - " x = self.fc(x)\n", - " x = self.sigmoid(x)\n", - " return x\n", - "\n", - " def to(self, device: str):\n", - " for param in self.params:\n", - " param.data = param.data.to(device=device)\n", - " return self\n", - "\n", - " def parameters(self):\n", - " return self.params\n", - " \n", - " def train(self):\n", - " for param in self.params:\n", - " param.requires_grad = True\n", - " \n", - " def eval(self):\n", - " for param in self.params:\n", - " param.requires_grad = False" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "训练并测试上述二分类模型。" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "0ddf036c37e040188f27cb5bab5ea3aa", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/140 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "hyper_params = {\n", - " 'learning_rate': 5.0e-2,\n", - " 'num_epochs': 10,\n", - " 'batch_size': 1024,\n", - "}\n", - "\n", - "model = Model_1_2().to(device)\n", - "\n", - "trainer = BinaryCLSTrainer(\n", - " model=model, \n", - " train_dataset=train_binarycls_dataset, eval_dataset=test_binarycls_dataset,\n", - " optimizer='manual', criterion='manual', **hyper_params\n", - ")\n", - "_ = trainer.train()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "接下来实现多分类任务的Trainer,包括训练流程、测试和画图。\n", - "\n", - "训练多分类任务模型,与二分类任务大部分一致,仅修改损失函数,以及对数据类型做适配。" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "class MultiCLSTrainer():\n", - " def __init__(\n", - " self,\n", - " model,\n", - " train_dataset: Union[Dataset, DataLoader],\n", - " eval_dataset: Union[Dataset, DataLoader],\n", - " optimizer,\n", - " criterion: Literal['torch', 'manual'],\n", - " learning_rate: float,\n", - " num_epochs: int,\n", - " batch_size: int,\n", - " weight_decay: float = 0.0,\n", - " test_dataset: Union[Dataset, DataLoader] = None,\n", - " plot: bool = True, \n", - " print_test_result: bool = True,\n", - " return_curves: bool = False,\n", - " log_epoch: int = 1\n", - " ):\n", - " self.model = model\n", - " self.learning_rate = learning_rate\n", - " self.num_epochs = num_epochs\n", - " self.batch_size = batch_size\n", - " self.plot = plot\n", - " self.print_test_result = print_test_result\n", - " self.return_curves = return_curves\n", - " self.log_epoch = log_epoch\n", - "\n", - " if isinstance(train_dataset, Dataset):\n", - " self.train_dataloader = DataLoader(\n", - " dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=cpu_count()\n", - " )\n", - " else:\n", - " self.train_dataloader = train_dataset\n", - " if isinstance(eval_dataset, Dataset):\n", - " self.eval_dataloader = DataLoader(\n", - " dataset=eval_dataset, batch_size=batch_size, shuffle=True, num_workers=cpu_count()\n", - " )\n", - " else:\n", - " self.eval_dataloader = eval_dataset\n", - " if isinstance(test_dataset, Dataset):\n", - " self.test_dataloader = DataLoader(\n", - " dataset=test_dataset, batch_size=batch_size, shuffle=True, num_workers=cpu_count()\n", - " )\n", - " else:\n", - " self.test_dataloader = test_dataset\n", - "\n", - " if isinstance(optimizer, str):\n", - " if optimizer == 'torch':\n", - " self.optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)\n", - " else:\n", - " self.optimizer = My_Optimizer(model.parameters(), lr=learning_rate)\n", - " else:\n", - " self.optimizer = optimizer(model.parameters(), lr=learning_rate, weight_decay=weight_decay)\n", - "\n", - " if criterion == 'torch':\n", - " self.criterion = nn.CrossEntropyLoss()\n", - " self.softmax = nn.Softmax(dim=1)\n", - " else:\n", - " self.criterion = My_CrossEntropyLoss()\n", - " self.softmax = My_Softmax(dim=1)\n", - "\n", - " def train(self):\n", - " train_loss_curve = []\n", - " eval_loss_curve = []\n", - " eval_acc_curve = []\n", - " step = 0\n", - " total_train_steps = self.num_epochs * len(self.train_dataloader)\n", - " with tqdm(total=total_train_steps) as pbar:\n", - " for epoch in range(self.num_epochs):\n", - " total_train_loss = 0\n", - " for x, targets in self.train_dataloader:\n", - " x = x.to(device=device, dtype=torch.float32)\n", - " targets = targets.to(device=device, dtype=torch.long)\n", - "\n", - " self.optimizer.zero_grad()\n", - " output = self.model(x)\n", - " loss = self.criterion(output, targets)\n", - " total_train_loss += loss.item()\n", - " train_loss_curve.append(loss.item())\n", - " \n", - " loss.backward()\n", - " self.optimizer.step()\n", - " step += 1\n", - " pbar.update(1)\n", - "\n", - " avg_eval_loss, avg_eval_acc = self.eval()\n", - " eval_loss_curve.append(avg_eval_loss)\n", - " eval_acc_curve.append(avg_eval_acc)\n", - " if self.log_epoch > 0 and (epoch + 1) % self.log_epoch == 0:\n", - " log_info = {\n", - " 'Epoch': f'{epoch + 1}/{self.num_epochs}',\n", - " 'Total Train Loss': f'{total_train_loss:.2f}',\n", - " 'Scaled Total Valid Loss': f'{avg_eval_loss * len(self.train_dataloader):.2f}',\n", - " 'Avg Valid Acc': f'{avg_eval_acc:.2%}'\n", - " }\n", - " print(log_info)\n", - "\n", - " return_info = {}\n", - " if self.test_dataloader:\n", - " test_acc = self.test()\n", - " if self.print_test_result:\n", - " print('Avg Test Acc:', f'{test_acc:.2%}')\n", - " return_info['test_acc'] = test_acc\n", - " if self.plot:\n", - " self.plot_results(train_loss_curve, eval_loss_curve, eval_acc_curve)\n", - " if self.return_curves:\n", - " curves = {\n", - " 'train_loss_curve': train_loss_curve,\n", - " 'eval_loss_curve': eval_loss_curve,\n", - " 'eval_acc_curve': eval_acc_curve\n", - " }\n", - " return_info['curves'] = curves\n", - " return return_info\n", - "\n", - " def eval(self):\n", - " total_eval_loss = 0\n", - " total_eval_acc = 0\n", - " total_eval_samples = 0\n", - " with torch.inference_mode():\n", - " for x, targets in self.eval_dataloader:\n", - " x = x.to(device=device, dtype=torch.float32)\n", - " targets = targets.to(device=device, dtype=torch.long)\n", - " output = self.model(x)\n", - " loss = self.criterion(output, targets)\n", - " total_eval_loss += loss.item()\n", - " preds = self.softmax(output).argmax(dim=1)\n", - " total_eval_acc += (preds == targets).float().sum().item()\n", - " total_eval_samples += targets.numel()\n", - " avg_eval_loss = total_eval_loss / len(self.eval_dataloader)\n", - " avg_eval_acc = total_eval_acc / total_eval_samples\n", - " return avg_eval_loss, avg_eval_acc\n", - "\n", - " def test(self):\n", - " total_test_acc = 0\n", - " total_test_samples = 0\n", - " with torch.inference_mode():\n", - " for x, targets in self.test_dataloader:\n", - " x = x.to(device=device, dtype=torch.float32)\n", - " targets = targets.to(device=device, dtype=torch.long)\n", - " output = self.model(x)\n", - " preds = self.softmax(output).argmax(dim=1)\n", - " total_test_acc += (preds == targets).float().sum().item()\n", - " total_test_samples += targets.numel()\n", - " avg_test_acc = total_test_acc / total_test_samples\n", - " return avg_test_acc\n", - " \n", - " def plot_results(self, train_loss_curve, eval_loss_curve, eval_acc_curve):\n", - " fig, axes = plt.subplots(1, 2, figsize=(10, 4))\n", - " \n", - " axes[0].plot(train_loss_curve, label='Training Loss', color='blue')\n", - " axes[0].plot(\n", - " np.linspace(len(self.train_dataloader), len(train_loss_curve), len(eval_loss_curve), endpoint=True),\n", - " eval_loss_curve, label='Validation Loss', color='orange'\n", - " )\n", - " axes[0].set_xlabel('Step')\n", - " axes[0].set_ylabel('Loss')\n", - " axes[0].set_title('Loss Curve')\n", - " axes[0].legend()\n", - " axes[0].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - " \n", - " axes[1].plot(eval_acc_curve, label='Validation Accuracy', color='green', marker='o')\n", - " axes[1].set_xlabel('Epoch')\n", - " axes[1].set_ylabel('Accuracy')\n", - " axes[1].set_title('Validation Accuracy Curve')\n", - " axes[1].legend()\n", - " axes[1].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - " \n", - " plt.tight_layout()\n", - " plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "手动构建MNIST多分类任务的模型。" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "class Model_1_3:\n", - " def __init__(self, input_features=784, num_classes=10):\n", - " self.flatten = My_Flatten()\n", - " self.linear = My_Linear(in_features=input_features, out_features=num_classes)\n", - " self.params = self.linear.params\n", - "\n", - " def __call__(self, x: torch.Tensor):\n", - " return self.forward(x)\n", - "\n", - " def forward(self, x: torch.Tensor):\n", - " x = self.flatten(x)\n", - " x = self.linear(x)\n", - " return x\n", - "\n", - " def to(self, device: str):\n", - " for param in self.params:\n", - " param.data = param.data.to(device=device)\n", - " return self\n", - "\n", - " def parameters(self):\n", - " return self.params\n", - " \n", - " def train(self):\n", - " for param in self.params:\n", - " param.requires_grad = True\n", - " \n", - " def eval(self):\n", - " for param in self.params:\n", - " param.requires_grad = False" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "训练并测试上述MNIST多分类模型。" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "45fbb247fc7a45bb9acfdab217c6a235", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/590 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "hyper_params = {\n", - " 'learning_rate': 6.0e-2,\n", - " 'num_epochs': 10,\n", - " 'batch_size': 1024,\n", - "}\n", - "\n", - "model = Model_1_3(\n", - " input_features=image_width * image_height, \n", - " num_classes=num_classes\n", - ").to(device)\n", - "\n", - "trainer = MultiCLSTrainer(\n", - " model=model, \n", - " train_dataset=train_mnist_dataset, eval_dataset=test_mnist_dataset, \n", - " optimizer='manual', criterion='manual', **hyper_params\n", - ")\n", - "_ = trainer.train()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 任务二\n", - "**利用torch.nn实现前馈神经网络解决上述回归、二分类、多分类任务。**\n", - "- 从训练时间、预测精度、Loss变化等角度分析实验结果(最好使用图表展示)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "torch.cuda.empty_cache()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "使用`torch.nn`构建回归任务的模型。" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "class Model_2_1(nn.Module):\n", - " def __init__(self, input_features=500):\n", - " super().__init__()\n", - " self.linear = nn.Linear(in_features=input_features, out_features=1)\n", - " self.sigmoid = nn.Sigmoid()\n", - "\n", - " def forward(self, x):\n", - " x = self.linear(x)\n", - " x = self.sigmoid(x)\n", - " return x" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "训练并测试上述回归模型。" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "891aac516c0a425192093fab78c3e7d7", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/70 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "hyper_params = {\n", - " 'learning_rate': 1.0e-1,\n", - " 'num_epochs': 10,\n", - " 'batch_size': 1024,\n", - "}\n", - "\n", - "model = Model_2_1().to(device)\n", - "\n", - "trainer = RegressionTrainer(\n", - " model=model, \n", - " train_dataset=train_regression_dataset, eval_dataset=test_regression_dataset,\n", - " optimizer='torch', criterion='torch', **hyper_params\n", - ")\n", - "_ = trainer.train()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "使用`torch.nn`构建二分类任务的模型。" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "class Model_2_2(nn.Module):\n", - " def __init__(self, input_features=200):\n", - " super().__init__()\n", - " self.fc = nn.Linear(in_features=input_features, out_features=1)\n", - " self.sigmoid = nn.Sigmoid()\n", - "\n", - " def forward(self, x):\n", - " x = self.fc(x)\n", - " x = self.sigmoid(x)\n", - " return x" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "训练并测试上述二分类模型。" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "35b427ce904541dfb6156882068e21ce", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/140 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "hyper_params = {\n", - " 'learning_rate': 5.0e-4,\n", - " 'num_epochs': 10,\n", - " 'batch_size': 1024,\n", - "}\n", - "\n", - "model = Model_2_2().to(device)\n", - "\n", - "trainer = BinaryCLSTrainer(\n", - " model=model, \n", - " train_dataset=train_binarycls_dataset, eval_dataset=test_binarycls_dataset,\n", - " optimizer='torch', criterion='torch', **hyper_params\n", - ")\n", - "_ = trainer.train()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "使用`torch.nn`构建MNIST多分类任务的模型。" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "class Model_2_3(nn.Module):\n", - " def __init__(self, input_features=784, num_classes=10):\n", - " super().__init__()\n", - " self.flatten = nn.Flatten()\n", - " self.linear = nn.Linear(in_features=input_features, out_features=num_classes)\n", - "\n", - " def forward(self, x: torch.Tensor):\n", - " x = self.flatten(x)\n", - " x = self.linear(x)\n", - " return x" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "训练并测试上述MNIST多分类模型。" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f5758a4202d0422bad77b31145b7f80b", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/590 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "hyper_params = {\n", - " 'learning_rate': 6.0e-2,\n", - " 'num_epochs': 10,\n", - " 'batch_size': 1024,\n", - "}\n", - "\n", - "model = Model_2_3(\n", - " input_features=image_width * image_height, \n", - " num_classes=num_classes\n", - ").to(device)\n", - "\n", - "trainer = MultiCLSTrainer(\n", - " model=model, \n", - " train_dataset=train_mnist_dataset, eval_dataset=test_mnist_dataset, \n", - " optimizer='torch', criterion='torch', **hyper_params\n", - ")\n", - "_ = trainer.train()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 任务三\n", - "**在多分类任务中使用至少三种不同的激活函数。**\n", - "- 使用不同的激活函数,进行对比实验并分析实验结果\n" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "torch.cuda.empty_cache()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "接下来定义4个模型,分别使用`nn.ReLU()`、`nn.Sigmoid()`、`nn.Tanh()`和`nn.LeakyReLU()`的实例作为激活函数。\n", - "\n", - "分别训练和测试。并将损失曲线和正确率曲线分别画在一个图内以进行比较4种激活函数的效果。" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "以ReLU为激活函数的模型开始训练:\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "efd5837ab4d34678acf767b988a8fd3a", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/590 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "class Model_3(nn.Module):\n", - " def __init__(\n", - " self, \n", - " activate_fn: Literal['ReLU', 'Sigmoid', 'Tanh', 'LeakyReLU'],\n", - " input_features: int = 784, \n", - " num_classes: int = 10\n", - " ):\n", - " super().__init__()\n", - " self.flatten = nn.Flatten()\n", - " self.fc1 = nn.Linear(in_features=input_features, out_features=1024)\n", - " self.fc2 = nn.Linear(in_features=1024, out_features=256)\n", - " self.fc3 = nn.Linear(in_features=256, out_features=num_classes)\n", - " if activate_fn == 'ReLU':\n", - " self.activate_fn = nn.ReLU()\n", - " elif activate_fn == 'Sigmoid':\n", - " self.activate_fn = nn.Sigmoid()\n", - " elif activate_fn == 'Tanh':\n", - " self.activate_fn = nn.Tanh()\n", - " elif activate_fn == 'LeakyReLU':\n", - " self.activate_fn = nn.LeakyReLU()\n", - "\n", - " def forward(self, x: torch.Tensor):\n", - " x = self.flatten(x)\n", - " x = self.fc1(x)\n", - " x = self.activate_fn(x)\n", - "\n", - " x = self.fc2(x)\n", - " x = self.activate_fn(x)\n", - "\n", - " x = self.fc3(x)\n", - " x = self.activate_fn(x)\n", - " return x\n", - " \n", - "models = {\n", - " fn: Model_3(fn, input_features=image_width * image_height, num_classes=num_classes).to(device) \n", - " for fn in ['ReLU', 'Sigmoid', 'Tanh', 'LeakyReLU']\n", - "}\n", - "plot_colors = {'ReLU': 'blue', 'Sigmoid': 'green', 'Tanh': 'orange', 'LeakyReLU': 'purple'}\n", - "\n", - "fig, axes = plt.subplots(1, 2, figsize=(7, 3.5))\n", - "\n", - "axes[0].set_xlabel('Epoch')\n", - "axes[0].set_ylabel('Loss')\n", - "axes[0].set_title('Validation Loss Curve')\n", - "axes[0].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - "axes[1].set_xlabel('Epoch')\n", - "axes[1].set_ylabel('Accuracy')\n", - "axes[1].set_title('Validation Accuracy Curve')\n", - "axes[1].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - "\n", - "hyper_params = {\n", - " 'learning_rate': 6.0e-2,\n", - " 'num_epochs': 10,\n", - " 'batch_size': 1024,\n", - "}\n", - "\n", - "for fn_name, model in models.items():\n", - " print(f\"以{fn_name}为激活函数的模型开始训练:\")\n", - " trainer = MultiCLSTrainer(\n", - " model=model, \n", - " train_dataset=train_mnist_dataset, eval_dataset=test_mnist_dataset, \n", - " optimizer='torch', criterion='torch', **hyper_params, \n", - " plot=False, return_curves=True, log_epoch=10\n", - " )\n", - " curves = trainer.train()['curves']\n", - "\n", - " axes[0].plot(\n", - " range(1, len(curves['eval_loss_curve']) + 1), curves['eval_loss_curve'], \n", - " label=fn_name, color=plot_colors[fn_name]\n", - " )\n", - " axes[1].plot(\n", - " range(1, len(curves['eval_acc_curve']) + 1), curves['eval_acc_curve'], \n", - " label=fn_name, color=plot_colors[fn_name]\n", - " )\n", - "\n", - "axes[0].legend()\n", - "axes[1].legend()\n", - "plt.tight_layout()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "在性能表现上,激活函数为`Sigmoid`的模型训练过程中损失下降速度非常慢,可见发生了梯度消失,这验证了`Sigmoid`非常容易出现梯度消失的问题。\n", - "\n", - "激活函数为`ReLU`的模型比较不稳定,有时会出现神经元死亡过多(值为$0$或接近$0$)的情况。\n", - "\n", - "`Tanh`以及`LeakyReLU`的表现相对优秀。\n", - "\n", - "在用时上,各激活函数的模型训练用时相近。" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 任务四\n", - "**对多分类任务中的模型评估隐藏层层数和隐藏单元个数对实验结果的影响。**\n", - "- 使用不同的隐藏层层数和隐藏单元个数,进行对比实验并分析实验结果\n" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "torch.cuda.empty_cache()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "接下来定义6个模型,隐藏层层数和隐藏单元个数分别如下:\n", - "\n", - "| 模型编号 | hidden_layer (层数) | hidden_size (隐藏单元个数) |\n", - "|:--------:|:-------------------:|:--------------------------:|\n", - "| 1 | 2 | 64 |\n", - "| 2 | 2 | 1024 |\n", - "| 3 | 4 | 64 |\n", - "| 4 | 4 | 1024 |\n", - "| 5 | 8 | 64 |\n", - "| 6 | 8 | 1024 |\n", - "\n", - "\n", - "\n", - "分别训练和测试。并将损失曲线和正确率曲线分别画在一个图内以进行比较6个模型的效果。" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "模型1(隐藏层层数为2,隐藏单元个数为64)开始训练:\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "137bb123d4664678b57c3a4c2e7ea977", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/590 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "class Model_4(nn.Module):\n", - " def __init__(\n", - " self, \n", - " hidden_size: int,\n", - " num_hidden_layer: int,\n", - " input_features: int = 784, \n", - " num_classes: int = 10\n", - " ):\n", - " super().__init__()\n", - " self.net = nn.Sequential(\n", - " nn.Flatten(),\n", - " nn.Linear(in_features=input_features, out_features=hidden_size),\n", - " nn.LeakyReLU()\n", - " )\n", - " for i in range(num_hidden_layer - 1):\n", - " self.net.append(nn.Linear(in_features=hidden_size, out_features=hidden_size))\n", - " self.net.append(nn.LeakyReLU())\n", - " self.net.append(nn.Linear(in_features=hidden_size, out_features=num_classes))\n", - " \n", - " def forward(self, x: torch.Tensor):\n", - " return self.net(x)\n", - " \n", - "model_arch_params = [\n", - " {'num_hidden_layer': 2, 'hidden_size': 64},\n", - " {'num_hidden_layer': 2, 'hidden_size': 1024},\n", - " {'num_hidden_layer': 4, 'hidden_size': 64},\n", - " {'num_hidden_layer': 4, 'hidden_size': 1024},\n", - " {'num_hidden_layer': 8, 'hidden_size': 64},\n", - " {'num_hidden_layer': 8, 'hidden_size': 1024},\n", - "]\n", - "plot_colors = ['blue', 'blue', 'green', 'green', 'orange', 'orange']\n", - "plot_linestyles = ['solid', 'dashed', 'solid', 'dashed', 'solid', 'dashed']\n", - "\n", - "fig, axes = plt.subplots(1, 2, figsize=(7, 3.5))\n", - "\n", - "axes[0].set_xlabel('Epoch')\n", - "axes[0].set_ylabel('Loss')\n", - "axes[0].set_title('Validation Loss Curve')\n", - "axes[0].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - "axes[1].set_xlabel('Epoch')\n", - "axes[1].set_ylabel('Accuracy')\n", - "axes[1].set_title('Validation Accuracy Curve')\n", - "axes[1].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - "\n", - "hyper_params = {\n", - " 'learning_rate': 6.0e-2,\n", - " 'num_epochs': 10,\n", - " 'batch_size': 1024,\n", - "}\n", - "\n", - "for index, arch_param in enumerate(model_arch_params):\n", - " num_hidden_layer, hidden_size = arch_param['num_hidden_layer'], arch_param['hidden_size']\n", - " model = Model_4(**arch_param, input_features=image_width * image_height, num_classes=num_classes).to(device)\n", - " \n", - " print(f\"模型{index + 1}(隐藏层层数为{num_hidden_layer},隐藏单元个数为{hidden_size})开始训练:\")\n", - " trainer = MultiCLSTrainer(\n", - " model=model, \n", - " train_dataset=train_mnist_dataset, eval_dataset=test_mnist_dataset, \n", - " optimizer='torch', criterion='torch', **hyper_params, \n", - " plot=False, return_curves=True, log_epoch=10\n", - " )\n", - " curves = trainer.train()['curves']\n", - "\n", - " axes[0].plot(\n", - " range(1, len(curves['eval_loss_curve']) + 1), curves['eval_loss_curve'], color=plot_colors[index],\n", - " label=f\"({num_hidden_layer}, {hidden_size})\", linestyle=plot_linestyles[index]\n", - " )\n", - " axes[1].plot(\n", - " range(1, len(curves['eval_acc_curve']) + 1), curves['eval_acc_curve'], color=plot_colors[index], \n", - " label=f\"({num_hidden_layer}, {hidden_size})\", linestyle=plot_linestyles[index]\n", - " )\n", - "\n", - "axes[0].legend()\n", - "axes[1].legend()\n", - "plt.tight_layout()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "在最终性能表现上,前4个模型相差无几,正确率都能达到$89\\%$左右。在训练过程中,深层网络的收敛速度慢。当模型层数增加到4以上时,模型无法正常收敛。\n", - "\n", - "以上两个现象可分析得知:深层网络容易出现梯度消失的问题。需要采用残差网络等结构优化。\n", - "\n", - "在用时上,由于模型较小,数据集也较小,GPU算力较高,用时基本没有差别。" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 任务五\n", - "\n", - "**在多分类任务实验中分别手动实现和用torch.nn实现dropout**\n", - "\n", - "- 探究不同丢弃率对实验结果的影响(可用loss曲线进行展示)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "torch.cuda.empty_cache()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "手动实现Dropout。\n", - "\n", - "由于需要判断此时传入的张量是否是训练状态,需要继承`nn.Module`来获取`self.training`,否则需要手动传入training参数。这里采取前者。" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "输入:\n", - "tensor([[ 1., 2., 3., 4., 5.],\n", - " [ 6., 7., 8., 9., 10.]])\n", - "My_Dropout输出:\n", - "tensor([[ 2., 0., 0., 0., 0.],\n", - " [ 0., 14., 0., 0., 0.]])\n", - "nn.Dropout输出:\n", - "tensor([[ 0., 4., 6., 8., 0.],\n", - " [12., 14., 0., 18., 0.]])\n" - ] - } - ], - "source": [ - "class My_Dropout(nn.Module):\n", - " def __init__(self, p=0.5):\n", - " super().__init__()\n", - " self.p = p\n", - " self.mask = None\n", - "\n", - " def forward(self, x: torch.Tensor):\n", - " if self.training:\n", - " mask = (torch.rand(x.shape, device=x.device) > self.p).to(x.dtype)\n", - " return x * mask / (1 - self.p)\n", - " else:\n", - " return x\n", - " \n", - "\n", - "# 测试\n", - "my_dropout = My_Dropout(p=0.5)\n", - "nn_dropout = nn.Dropout(p=0.5)\n", - "x = torch.tensor([[1.0, 2.0, 3.0, 4.0, 5.0],\n", - " [6.0, 7.0, 8.0, 9.0, 10.0]])\n", - "print(f\"输入:\\n{x}\")\n", - "output_my_dropout = my_dropout(x)\n", - "output_nn_dropout = nn_dropout(x)\n", - "print(f\"My_Dropout输出:\\n{output_my_dropout}\")\n", - "print(f\"nn.Dropout输出:\\n{output_nn_dropout}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "使用手动实现的Dropout进行多分类任务训练。" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "9dbe15ae148c48f1b3b63f5fc56aaa98", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/590 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "class Model_5_1():\n", - " def __init__(\n", - " self, \n", - " dropout_p: float = 0.5,\n", - " input_features: int = 784, \n", - " num_classes: int = 10\n", - " ):\n", - " self.flatten = My_Flatten()\n", - " self.fc1 = My_Linear(in_features=input_features, out_features=1024)\n", - " self.leakyrelu = My_LeakyReLU()\n", - " self.dropout = My_Dropout(p=dropout_p)\n", - " self.fc2 = My_Linear(in_features=1024, out_features=num_classes)\n", - " self.params = self.fc1.params + self.fc2.params\n", - "\n", - " def __call__(self, x: torch.Tensor):\n", - " return self.forward(x)\n", - "\n", - " def forward(self, x: torch.Tensor):\n", - " x = self.flatten(x)\n", - " x = self.dropout(self.leakyrelu(self.fc1(x)))\n", - " x = self.fc2(x)\n", - " return x\n", - "\n", - " def to(self, device: str):\n", - " for param in self.params:\n", - " param.data = param.data.to(device=device)\n", - " return self\n", - "\n", - " def parameters(self):\n", - " return self.params\n", - " \n", - " def train(self):\n", - " for param in self.params:\n", - " param.requires_grad = True\n", - " \n", - " def eval(self):\n", - " for param in self.params:\n", - " param.requires_grad = False\n", - "\n", - "\n", - "hyper_params = {\n", - " 'learning_rate': 6.0e-2,\n", - " 'num_epochs': 10,\n", - " 'batch_size': 1024,\n", - "}\n", - "\n", - "model = Model_5_1(\n", - " input_features=image_width * image_height, \n", - " num_classes=num_classes\n", - ").to(device)\n", - "\n", - "trainer = MultiCLSTrainer(\n", - " model=model, \n", - " train_dataset=train_mnist_dataset, eval_dataset=test_mnist_dataset, \n", - " optimizer='manual', criterion='manual', **hyper_params\n", - ")\n", - "_ = trainer.train()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "探究不同丢弃率对实验结果的影响。" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "模型1(丢弃率为0.0)开始训练:\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "00396f5e8fd24b02ac36d992f605f531", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/590 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "class Model_5_2(nn.Module):\n", - " def __init__(\n", - " self, \n", - " dropout_p: float = None,\n", - " input_features: int = 784, \n", - " num_classes: int = 10\n", - " ):\n", - " super().__init__()\n", - " self.net = nn.Sequential(\n", - " nn.Flatten(),\n", - " nn.Linear(in_features=input_features, out_features=4096),\n", - " nn.LeakyReLU(),\n", - " nn.Dropout(p=dropout_p),\n", - " nn.Linear(in_features=4096, out_features=4096),\n", - " nn.LeakyReLU(),\n", - " nn.Dropout(p=dropout_p),\n", - " nn.Linear(in_features=4096, out_features=4096),\n", - " nn.LeakyReLU(),\n", - " nn.Dropout(p=dropout_p),\n", - " nn.Linear(in_features=4096, out_features=4096),\n", - " nn.LeakyReLU(),\n", - " nn.Dropout(p=dropout_p),\n", - " nn.Linear(in_features=4096, out_features=num_classes)\n", - " )\n", - "\n", - " def forward(self, x: torch.Tensor):\n", - " return self.net(x)\n", - " \n", - "dropout_ratios = [0.0, 0.2, 0.5, 0.9]\n", - "plot_colors = ['blue', 'green', 'orange', 'purple']\n", - "\n", - "fig, axes = plt.subplots(1, 2, figsize=(7, 3.5))\n", - "\n", - "axes[0].set_xlabel('Epoch')\n", - "axes[0].set_ylabel('Loss')\n", - "axes[0].set_title('Validation Loss Curve')\n", - "axes[0].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - "axes[1].set_xlabel('Epoch')\n", - "axes[1].set_ylabel('Accuracy')\n", - "axes[1].set_title('Validation Accuracy Curve')\n", - "axes[1].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - "\n", - "hyper_params = {\n", - " 'learning_rate': 6.0e-2,\n", - " 'num_epochs': 10,\n", - " 'batch_size': 1024,\n", - "}\n", - "\n", - "for index, dropout_ratio in enumerate(dropout_ratios):\n", - " model = Model_5_2(dropout_p=dropout_ratio, input_features=image_width * image_height, num_classes=num_classes).to(device)\n", - " \n", - " print(f\"模型{index + 1}(丢弃率为{dropout_ratio})开始训练:\")\n", - " trainer = MultiCLSTrainer(\n", - " model=model, \n", - " train_dataset=train_mnist_dataset, eval_dataset=test_mnist_dataset, \n", - " optimizer='torch', criterion='torch', **hyper_params, \n", - " plot=False, return_curves=True, log_epoch=10\n", - " )\n", - " curves = trainer.train()['curves']\n", - "\n", - " axes[0].plot(\n", - " range(1, len(curves['eval_loss_curve']) + 1), curves['eval_loss_curve'],\n", - " label=f\"dropout={dropout_ratio}\", color=plot_colors[index]\n", - " )\n", - " axes[1].plot(\n", - " range(1, len(curves['eval_acc_curve']) + 1), curves['eval_acc_curve'], \n", - " label=f\"dropout={dropout_ratio}\", color=plot_colors[index]\n", - " )\n", - "\n", - "axes[0].legend()\n", - "axes[1].legend()\n", - "plt.tight_layout()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 任务六\n", - "\n", - "**在多分类任务实验中分别手动实现和用torch.nn实现$L_2$正则化**\n", - "\n", - "- 探究惩罚项的权重对实验结果的影响(可用loss曲线进行展示)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "torch.cuda.empty_cache()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "$L_2$正则化,又称权重衰减,pytorch的的实现是将$L_2$正则化的系数作为优化器的`weight_decay`参数传入,在`step()`的过程中计算完成。具体原理如下:\n", - "\n", - "$L_2$正则化的公式是\n", - "\n", - "$$\n", - "L = L_0 + \\frac{\\lambda }{2n} \\sum_{w}^{}w^2\n", - "$$\n", - "\n", - "其中$L$是进行$L_2$正则化后的损失,$L_0$是损失函数计算出来的原损失,$\\lambda$是$L_2$正则化系数/惩罚项权重(即optimizer的`weight_decay`参数),$n$是样本大小。\n", - "\n", - "反向传播:\n", - "\n", - "$$\n", - "\\begin{align}\n", - "\\frac{\\partial L}{\\partial w} & = \\frac{\\partial L_0}{\\partial w} + \\sum_{w}^{} \\frac{\\lambda }{n} w \\\\\n", - "\\frac{\\partial L}{\\partial b} & = \\frac{\\partial L_0}{\\partial b}\n", - "\\end{align}\n", - "$$\n", - "\n", - "所以,参数更新为:\n", - "\n", - "$$\n", - "\\begin{align}\n", - "w: & = w - \\frac{\\eta}{n}\\frac{\\partial L_0}{\\partial w} - \\frac{\\eta \\lambda }{n}\\sum_{w}^{}w \\\\\n", - "b: & = b - \\frac{\\eta}{n}\\frac{\\partial L_0}{\\partial b}\n", - "\\end{align}\n", - "$$\n", - "\n", - "其中$\\eta$是学习率。\n", - "\n", - "所以,手动在优化器中实现$L_2$正则化如下:" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "params1的梯度为:\n", - " tensor([[2., 2.]])\n", - "params2的梯度为:\n", - " tensor([[2., 2.]])\n", - "经过L_2正则化后的My_SGD反向传播结果:\n", - " tensor([[-0.0500, 0.9000]])\n", - "经过L_2正则化后的torch.optim.SGD反向传播结果:\n", - " tensor([[-0.0500, 0.9000]])\n" - ] - } - ], - "source": [ - "class My_SGD:\n", - " def __init__(self, params: list[torch.Tensor], lr: float, weight_decay=0.0):\n", - " self.params = params\n", - " self.lr = lr\n", - " self.weight_decay = weight_decay\n", - "\n", - " def step(self):\n", - " with torch.no_grad():\n", - " for param in self.params:\n", - " if param.grad is not None:\n", - " if len(param.data.shape) > 1:\n", - " param.data = param.data - self.lr * (param.grad + self.weight_decay * param.data)\n", - " else:\n", - " param.data = param.data - self.lr * param.grad\n", - "\n", - " def zero_grad(self):\n", - " for param in self.params:\n", - " if param.grad is not None:\n", - " param.grad.data = torch.zeros_like(param.grad.data)\n", - "\n", - "\n", - "# 测试\n", - "params1 = torch.tensor([[1., 2, ]], requires_grad=True)\n", - "params2 = torch.tensor([[1., 2, ]], requires_grad=True)\n", - "\n", - "my_sgd = My_SGD(params=[params1], lr=0.5, weight_decay=0.1)\n", - "optim_sgd = torch.optim.SGD(params=[params2], lr=0.5, weight_decay=0.1)\n", - "my_sgd.zero_grad()\n", - "optim_sgd.zero_grad()\n", - "\n", - "loss1 = 2 * params1.sum()\n", - "loss2 = 2 * params2.sum()\n", - " # 偏导为2\n", - "loss1.backward()\n", - "loss2.backward()\n", - "print(\"params1的梯度为:\\n\", params1.grad.data)\n", - "print(\"params2的梯度为:\\n\", params2.grad.data)\n", - "\n", - "my_sgd.step()\n", - "optim_sgd.step()\n", - "# 结果为:w - lr * grad - lr * weight_decay_rate * w\n", - "# w[0] = 1 - 0.5 * 2 - 0.5 * 0.1 * 1 = -0.0500\n", - "# w[1] = 2 - 0.5 * 2 - 0.5 * 0.1 * 2 = 0.9000\n", - "print(\"经过L_2正则化后的My_SGD反向传播结果:\\n\", params1.data)\n", - "print(\"经过L_2正则化后的torch.optim.SGD反向传播结果:\\n\", params2.data)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "使用手动实现的带$L_2$正则项的优化器进行多分类任务训练。" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "3e2d7d0e1bee4e9198ddbac0c330bfdf", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/590 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "hyper_params = {\n", - " 'learning_rate': 6.0e-2,\n", - " 'num_epochs': 10,\n", - " 'batch_size': 1024,\n", - " 'weight_decay': 0.1\n", - "}\n", - "\n", - "model = Model_5_1(\n", - " dropout_p=0.0,\n", - " input_features=image_width * image_height, \n", - " num_classes=num_classes\n", - ").to(device)\n", - "\n", - "trainer = MultiCLSTrainer(\n", - " model=model, \n", - " train_dataset=train_mnist_dataset, eval_dataset=test_mnist_dataset, \n", - " optimizer=My_SGD, criterion='manual', **hyper_params\n", - ")\n", - "_ = trainer.train()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "探究惩罚项的权重对实验结果的影响。" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "模型1(训练正则项系数为0.0)开始训练:\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "6b3d5ec1bdba4ed7ba4cc88234ed38d7", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/590 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "weight_decays = [0.0, 0.1, 0.5, 0.9]\n", - "plot_colors = ['blue', 'green', 'orange', 'purple']\n", - "\n", - "fig, axes = plt.subplots(1, 2, figsize=(7, 3.5))\n", - "\n", - "axes[0].set_xlabel('Epoch')\n", - "axes[0].set_ylabel('Loss')\n", - "axes[0].set_title('Validation Loss Curve')\n", - "axes[0].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - "axes[1].set_xlabel('Epoch')\n", - "axes[1].set_ylabel('Accuracy')\n", - "axes[1].set_title('Validation Accuracy Curve')\n", - "axes[1].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - "\n", - "hyper_params_list = [\n", - " {\n", - " 'learning_rate': 6.0e-2, 'num_epochs': 10, 'batch_size': 1024,\n", - " 'weight_decay': weight_decay\n", - " } \n", - " for weight_decay in weight_decays\n", - "]\n", - "\n", - "for index, hyper_params in enumerate(hyper_params_list):\n", - " model = Model_5_1(dropout_p=0.0, input_features=image_width * image_height, num_classes=num_classes).to(device)\n", - " \n", - " print(f\"模型{index + 1}(训练正则项系数为{hyper_params['weight_decay']})开始训练:\")\n", - " trainer = MultiCLSTrainer(\n", - " model=model, \n", - " train_dataset=train_mnist_dataset, eval_dataset=test_mnist_dataset, \n", - " optimizer='torch', criterion='torch', **hyper_params, \n", - " plot=False, return_curves=True, log_epoch=10\n", - " )\n", - " curves = trainer.train()['curves']\n", - "\n", - " axes[0].plot(\n", - " range(1, len(curves['eval_loss_curve']) + 1), curves['eval_loss_curve'],\n", - " label=f\"weight_decay={hyper_params['weight_decay']}\", color=plot_colors[index]\n", - " )\n", - " axes[1].plot(\n", - " range(1, len(curves['eval_acc_curve']) + 1), curves['eval_acc_curve'], \n", - " label=f\"weight_decay={hyper_params['weight_decay']}\", color=plot_colors[index]\n", - " )\n", - "\n", - "axes[0].legend()\n", - "axes[1].legend()\n", - "plt.tight_layout()\n", - "plt.show()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 任务七\n", - "\n", - "**对回归、二分类、多分类任务分别选择上述实验中效果最好的模型,采用10折交叉验证评估实验结果**\n", - "\n", - "- 要求除了最终结果外还需以表格的形式展示每折的实验结果\n" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [], - "source": [ - "torch.cuda.empty_cache()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "制造$k$折数据集,将原先的训练集和测试集混合,然后按照$k-1:1$的比例,按照顺序拆分出$k$对训练集和测试集。" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [], - "source": [ - "class KFoldDatasetSplitter:\n", - " def __init__(self, dataset, k_folds: int=10, shuffle=True):\n", - " self.dataset = dataset\n", - " self.k_folds = k_folds\n", - " self.shuffle = shuffle\n", - "\n", - " self.kfold = KFold(n_splits=k_folds, shuffle=shuffle, random_state=seed)\n", - " self.indices = list(range(len(dataset)))\n", - " self.splits = list(self.kfold.split(self.indices))\n", - "\n", - " def get_fold(self, fold_idx: int):\n", - " assert 0 <= fold_idx and fold_idx < self.k_folds, \"fold_idx out of range\"\n", - "\n", - " train_idx, val_idx = self.splits[fold_idx]\n", - " train_subset = Subset(self.dataset, train_idx)\n", - " val_subset = Subset(self.dataset, val_idx)\n", - "\n", - " return train_subset, val_subset\n", - "\n", - " def __len__(self):\n", - " return self.k_folds" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "回归任务。" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "模型使用第1组数据开始训练:\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "0641e17cd94442ccbd2c7afdac1e8a07", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/70 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "k_fold_splitter = KFoldDatasetSplitter(train_regression_dataset, k_folds=10, shuffle=True)\n", - "\n", - "hyper_params = {\n", - " 'learning_rate': 1.0e-1,\n", - " 'num_epochs': 10,\n", - " 'batch_size': 1024,\n", - "}\n", - "\n", - "eval_errors = []\n", - "test_errors = []\n", - "\n", - "for index in range(len(k_fold_splitter)):\n", - " model = Model_2_1().to(device)\n", - " train_dataset, eval_dataset = k_fold_splitter.get_fold(fold_idx=index)\n", - " print(f\"模型使用第{index + 1}组数据开始训练:\")\n", - " trainer = RegressionTrainer(\n", - " model=model, \n", - " train_dataset=train_dataset, eval_dataset=eval_dataset, test_dataset=test_regression_dataset,\n", - " optimizer='torch', criterion='torch', **hyper_params,\n", - " plot=False, print_test_result=False, return_curves=True, log_epoch=0\n", - " )\n", - " train_result = trainer.train()\n", - " eval_errors.append(train_result['curves']['eval_error_curve'][-1])\n", - " test_errors.append(train_result['test_error'])\n", - "\n", - "fig, ax = plt.subplots(figsize=(7, 3.5))\n", - "\n", - "fold_indices = list(range(1, len(k_fold_splitter) + 1))\n", - "bar_width = 0.35\n", - "x = np.arange(len(fold_indices))\n", - "\n", - "val_bars = ax.bar(x - bar_width / 2, eval_errors, width=bar_width, label='Validation Error', color='blue')\n", - "test_bars = ax.bar(x + bar_width / 2, test_errors, width=bar_width, label='Test Error', color='green')\n", - "\n", - "ax.set_xlabel('Fold Index')\n", - "ax.set_ylabel('Error')\n", - "ax.set_title('Validation vs Test Error per Fold')\n", - "ax.set_xticks(x)\n", - "ax.set_xticklabels(fold_indices)\n", - "ax.legend()\n", - "ax.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - "\n", - "for bar in val_bars:\n", - " height = bar.get_height()\n", - " ax.text(bar.get_x() + bar.get_width() / 2, height + 0.01, f'{height:.2f}', ha='center', va='bottom', fontsize=6)\n", - "for bar in test_bars:\n", - " height = bar.get_height()\n", - " ax.text(bar.get_x() + bar.get_width() / 2, height + 0.01, f'{height:.2f}', ha='center', va='bottom', fontsize=6)\n", - "\n", - "plt.tight_layout()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "二分类任务。" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "模型使用第1组数据开始训练:\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "c57b0a4859784e3280f53bf5f4164c33", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/130 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "k_fold_splitter = KFoldDatasetSplitter(train_binarycls_dataset, k_folds=10, shuffle=True)\n", - "\n", - "hyper_params = {\n", - " 'learning_rate': 5.0e-4,\n", - " 'num_epochs': 10,\n", - " 'batch_size': 1024,\n", - "}\n", - "\n", - "eval_accs = []\n", - "test_accs = []\n", - "\n", - "for index in range(len(k_fold_splitter)):\n", - " model = Model_2_2().to(device)\n", - " train_dataset, eval_dataset = k_fold_splitter.get_fold(fold_idx=index)\n", - " print(f\"模型使用第{index + 1}组数据开始训练:\")\n", - " trainer = BinaryCLSTrainer(\n", - " model=model, \n", - " train_dataset=train_dataset, eval_dataset=eval_dataset, test_dataset=test_binarycls_dataset,\n", - " optimizer='torch', criterion='torch', **hyper_params,\n", - " plot=False, print_test_result=False, return_curves=True, log_epoch=0\n", - " )\n", - " train_result = trainer.train()\n", - " eval_accs.append(train_result['curves']['eval_acc_curve'][-1])\n", - " test_accs.append(train_result['test_acc'])\n", - "\n", - "fig, ax = plt.subplots(figsize=(7, 3.5))\n", - "\n", - "fold_indices = list(range(1, len(k_fold_splitter) + 1))\n", - "bar_width = 0.35\n", - "x = np.arange(len(fold_indices))\n", - "\n", - "val_bars = ax.bar(x - bar_width / 2, eval_accs, width=bar_width, label='Validation Accuracy', color='blue')\n", - "test_bars = ax.bar(x + bar_width / 2, test_accs, width=bar_width, label='Test Accuracy', color='green')\n", - "\n", - "ax.set_xlabel('Fold Index')\n", - "ax.set_ylabel('Accuracy')\n", - "ax.set_title('Validation vs Test Accuracy per Fold')\n", - "ax.set_xticks(x)\n", - "ax.set_xticklabels(fold_indices)\n", - "ax.legend()\n", - "ax.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - "\n", - "for bar in val_bars:\n", - " height = bar.get_height()\n", - " ax.text(bar.get_x() + bar.get_width() / 2, height + 0.01, f'{height:.1%}', ha='center', va='bottom', fontsize=6)\n", - "for bar in test_bars:\n", - " height = bar.get_height()\n", - " ax.text(bar.get_x() + bar.get_width() / 2, height + 0.01, f'{height:.1%}', ha='center', va='bottom', fontsize=6)\n", - "\n", - "plt.tight_layout()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "多分类任务。" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "模型使用第1组数据开始训练:\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f652aaa3f4b4473d9bbbf665dc40f192", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/530 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "k_fold_splitter = KFoldDatasetSplitter(train_mnist_dataset, k_folds=10, shuffle=True)\n", - "\n", - "hyper_params = {\n", - " 'learning_rate': 6.0e-2,\n", - " 'num_epochs': 10,\n", - " 'batch_size': 1024,\n", - "}\n", - "\n", - "eval_accs = []\n", - "test_accs = []\n", - "\n", - "for index in range(len(k_fold_splitter)):\n", - " model = Model_4(\n", - " num_hidden_layer=4, hidden_size=1024, \n", - " input_features=image_width * image_height, num_classes=num_classes\n", - " ).to(device)\n", - " train_dataset, eval_dataset = k_fold_splitter.get_fold(fold_idx=index)\n", - " print(f\"模型使用第{index + 1}组数据开始训练:\")\n", - " trainer = MultiCLSTrainer(\n", - " model=model, \n", - " train_dataset=train_dataset, eval_dataset=eval_dataset, test_dataset=test_mnist_dataset,\n", - " optimizer='torch', criterion='torch', **hyper_params,\n", - " plot=False, print_test_result=False, return_curves=True, log_epoch=0\n", - " )\n", - " train_result = trainer.train()\n", - " eval_accs.append(train_result['curves']['eval_acc_curve'][-1])\n", - " test_accs.append(train_result['test_acc'])\n", - "\n", - "fig, ax = plt.subplots(figsize=(7, 3.5))\n", - "\n", - "fold_indices = list(range(1, len(k_fold_splitter) + 1))\n", - "bar_width = 0.35\n", - "x = np.arange(len(fold_indices))\n", - "\n", - "val_bars = ax.bar(x - bar_width / 2, eval_accs, width=bar_width, label='Validation Accuracy', color='blue')\n", - "test_bars = ax.bar(x + bar_width / 2, test_accs, width=bar_width, label='Test Accuracy', color='green')\n", - "\n", - "ax.set_xlabel('Fold Index')\n", - "ax.set_ylabel('Accuracy')\n", - "ax.set_title('Validation vs Test Accuracy per Fold')\n", - "ax.set_xticks(x)\n", - "ax.set_xticklabels(fold_indices)\n", - "ax.legend()\n", - "ax.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - "\n", - "for bar in val_bars:\n", - " height = bar.get_height()\n", - " ax.text(bar.get_x() + bar.get_width() / 2, height + 0.01, f'{height:.1%}', ha='center', va='bottom', fontsize=5.5)\n", - "for bar in test_bars:\n", - " height = bar.get_height()\n", - " ax.text(bar.get_x() + bar.get_width() / 2, height + 0.01, f'{height:.1%}', ha='center', va='bottom', fontsize=5.5)\n", - "\n", - "plt.tight_layout()\n", - "plt.show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.13" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/Lab3/.ipynb_checkpoints/卷积神经网络实验-checkpoint.ipynb b/Lab3/.ipynb_checkpoints/卷积神经网络实验-checkpoint.ipynb deleted file mode 100644 index 75c0279..0000000 --- a/Lab3/.ipynb_checkpoints/卷积神经网络实验-checkpoint.ipynb +++ /dev/null @@ -1,1692 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "64715782-d3fc-42dd-8d82-9b97bbf8c7bf", - "metadata": {}, - "source": [ - "

研究生《深度学习》课程
实验报告

\n", - "
\n", - "
课程名称:深度学习 M502019B
\n", - "
实验题目:卷积神经网络实验
\n", - "
学号:25120323
\n", - "
姓名:柯劲帆
\n", - "
授课老师:原继东
\n", - "
报告日期:2025年8月13日
\n", - "
" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "74dbbe2c-7b00-40c7-964b-bd01e2835292", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Pytorch version: 2.7.1+cu118\n", - "CUDA version: 11.8\n", - "CUDA device count: 1\n", - "CUDA device name: NVIDIA TITAN Xp\n", - "CUDA device capability: (6, 1)\n", - "CUDA device memory: 11.90 GB\n", - "CPU count: 8\n" - ] - } - ], - "source": [ - "import os\n", - "import numpy as np\n", - "import torch\n", - "from torch.autograd import Variable\n", - "from torch.utils.data import Dataset, DataLoader, Subset, random_split\n", - "from torch import nn\n", - "from torchvision import datasets, transforms\n", - "from PIL import Image\n", - "from multiprocessing import cpu_count\n", - "import matplotlib.pyplot as plt\n", - "from tqdm.notebook import tqdm\n", - "import pandas as pd\n", - "import collections\n", - "from typing import Literal, Union, Optional, List\n", - "\n", - "print('Pytorch version:',torch.__version__)\n", - "if not torch.cuda.is_available():\n", - " print('CUDA is_available:', torch.cuda.is_available())\n", - "else:\n", - " print('CUDA version:', torch.version.cuda)\n", - " print('CUDA device count:', torch.cuda.device_count())\n", - " print('CUDA device name:', torch.cuda.get_device_name())\n", - " print('CUDA device capability:', torch.cuda.get_device_capability())\n", - " print('CUDA device memory:', f'{torch.cuda.get_device_properties(0).total_memory/1024/1024/1024:.2f}', 'GB')\n", - "print('CPU count:', cpu_count())\n", - "\n", - "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", - "seed = 42\n", - "np.random.seed(seed)\n", - "torch.manual_seed(seed)\n", - "torch.cuda.manual_seed(seed)\n", - "cpu_count = cpu_count()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "1195679d-2174-425f-ab51-86b9ce66dc5c", - "metadata": {}, - "source": [ - "# 1. 二维卷积实验\n", - "\n", - "- 手写二维卷积的实现,并在至少一个数据集上进行实验,从训练时间、预测精度、Loss变化等角度分析实验结果(最好使用图表展示)(只用循环几轮即可)\n", - "- 使用torch.nn实现二维卷积,并在至少一个数据集上进行实验,从训练时间、预测精度、Loss变化等角度分析实验结果(最好使用图表展示)\n", - "- 不同超参数的对比分析(包括卷积层数、卷积核大小、batchsize、lr等)选其中至少1-2个进行分析\n", - "- 使用PyTorch实现经典模型AlexNet并在至少一个数据集进行试验分析" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "58d823c9-e690-4a63-bee6-49f9b1485e90", - "metadata": {}, - "outputs": [], - "source": [ - "torch.cuda.empty_cache()" - ] - }, - { - "cell_type": "markdown", - "id": "c659cade-b5aa-4530-9537-bfbe9d4d35cf", - "metadata": {}, - "source": [ - "创建数据集。\n", - "\n", - "**车辆分类数据集**\n", - "\n", - "- 输入图片,输出对应的类别\n", - "- 共1358张车辆图片\n", - "- 分别属于汽车、客车和货车三类\n", - " - 汽车:779张\n", - " - 客车:218张\n", - " - 货车:360张\n", - "- 每个类别随机取20-30%当作测试集\n", - "- 各图片的大小不一,需要将图片拉伸到相同大小\n", - "\n", - "对于原数据集进行`8:2`划分处理。将各个类别的数据分别进行划分。\n", - "\n", - "这里已经将数据集划分完毕,将各部分数据的路径和列表保存在csv文件中。划分代码`dataset/Vehicles/split_dataset.py`内容如下:\n", - "\n", - "```python\n", - "import os\n", - "import random\n", - "import pandas as pd\n", - "\n", - "train_list = list()\n", - "test_list = list()\n", - "\n", - "root_dir = \"raw\"\n", - "class_index = 0\n", - "for vehicle in os.listdir(root_dir):\n", - " img_list = [i for i in os.listdir(os.path.join(root_dir, vehicle)) if i.endswith(\".jpg\")]\n", - " random.shuffle(img_list)\n", - " split_num = int(len(img_list) * 0.8)\n", - " for img in img_list[0 : split_num]:\n", - " train_list.append([os.path.join(root_dir, vehicle, img), class_index])\n", - " for img in img_list[split_num : ]:\n", - " test_list.append([os.path.join(root_dir, vehicle, img), class_index])\n", - " class_index += 1\n", - "\n", - "train_list.sort()\n", - "test_list.sort()\n", - "\n", - "pd.DataFrame(data=train_list, columns=[\"Vehicle\", \"Label\"]).to_csv(\"./train.csv\", index=False)\n", - "pd.DataFrame(data=test_list, columns=[\"Vehicle\", \"Label\"]).to_csv(\"./test.csv\", index=False)\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "0209565b-6409-4615-9f4e-d015a0fbe839", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Vehicle Train Dataset Size: 1085\n", - "Vehicle Test Dataset Size: 272\n", - "A Train Sample:\n", - "\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAIcAAACdCAYAAACeqmv3AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjUsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvWftoOwAAAAlwSFlzAAAPYQAAD2EBqD+naQAACWRJREFUeJzt3VtIFV0bB/D/fH6mIpEoWiBhiAYGBpFUiJIdQCIJg+gmqAi6iIIQKuqi9KoIk8IMCqITXYpFVHSV3oRoEUlGlolCRphmmREZstd387nf7YzPHNfsQ+//B17s2WvPrL15XMdZawyllALRAv6T6AxQ8mJwkIjBQSIGB4kYHCRicJCIwUEiBgeJGBwk+quDY2RkBIZh4MKFC9rO2dXVBcMw0NXVpe2cySrpguPWrVswDAMvXrxIdFZC8e7dOzQ0NKCyshKZmZkwDAMjIyOJztaCki44/nbd3d1obW3F9PQ0ysrKEp0dWwyOONuxYwe+f/+O169fY8+ePYnOjq2UDI4/f/7gzJkzWLt2LZYsWYLs7GxUV1ejs7NT/MzFixdRVFSErKwsbNy4Ef39/ZY0AwMD2LVrF3Jzc5GZmYmKigo8ePDAMT+/fv3CwMAAJiYmHNPm5uZi8eLFjumSQUoGx48fP3D9+nXU1NTg/PnzaGpqwvj4OGpra/Hq1StL+jt37qC1tRWHDx/GqVOn0N/fj82bN2NsbCya5s2bN9iwYQPevn2LkydPoqWlBdnZ2aivr8e9e/ds89Pb24uysjK0tbXp/qqJpZLMzZs3FQD1/PlzMc3s7KyamZmZd+zbt29q6dKl6sCBA9Fjw8PDCoDKyspSo6Oj0eM9PT0KgGpoaIge27JliyovL1e/f/+OHotEIqqyslKVlpZGj3V2dioAqrOz03KssbHR03dtbm5WANTw8LCnz8VLSpYcaWlpWLRoEQAgEolgcnISs7OzqKiowMuXLy3p6+vrUVhYGH29bt06rF+/Ho8fPwYATE5O4unTp9i9ezemp6cxMTGBiYkJfP36FbW1tRgcHMSnT5/E/NTU1EAphaamJr1fNMFSMjgA4Pbt21i9ejUyMzORl5eH/Px8PHr0CFNTU5a0paWllmMrV66MdiE/fPgApRROnz6N/Pz8eX+NjY0AgC9fvoT6fZLRfxOdAT/u3r2L/fv3o76+HsePH0dBQQHS0tJw7tw5DA0NeT5fJBIBABw7dgy1tbULpikpKQmU51SUksHR3t6O4uJidHR0wDCM6PG5/3KzwcFBy7H3799jxYoVAIDi4mIAQHp6OrZu3ao/wykqJauVtLQ0AICKuTe6p6cH3d3dC6a/f//+vDZDb28venp6sG3bNgBAQUEBampqcO3aNXz+/Nny+fHxcdv8eOnKppKkLTlu3LiBJ0+eWI4fPXoUdXV16OjowM6dO7F9+3YMDw/j6tWrWLVqFX7+/Gn5TElJCaqqqnDo0CHMzMzg0qVLyMvLw4kTJ6Jprly5gqqqKpSXl+PgwYMoLi7G2NgYuru7MTo6ir6+PjGvvb292LRpExobGx0bpVNTU7h8+TIA4NmzZwCAtrY25OTkICcnB0eOHHHz88RHgntLFnNdWenv48ePKhKJqLNnz6qioiKVkZGh1qxZox4+fKj27dunioqKouea68o2NzerlpYWtXz5cpWRkaGqq6tVX1+f5dpDQ0Nq7969atmyZSo9PV0VFhaquro61d7eHk0TtCs7l6eF/mLzngwMpbhuhRaWkm0Oig8GB4kYHCRicJCIwUEiBgeJGBwkcj1Cahge4siwec88qmKT1u40QSjxxYIH4s4uB5p+WrgZ3mLJQSIGB4n0TLwZti/t3zRiD5iKupBKeEN8YT2Q7FVQLPNXCToxwpKDRAwOEmmpVgL1KrRNCsvVk9/Wv6drWsp0u8/Zf2ebinbea8e8253IBZYcJGJwkIjBQSL/bY6Y+szLyJy3a5jOZNs+kd+z5sd9Do3Y83r5Yuasz3vhqYEicvrdAzY5WHKQjMFBonC6srHVgVNXNSatU6mtXJ7XuQj9J4W3KtD/LKERk18vtYqXCsiumvFUQ/8fSw4SMThIxOAgkf82R0ydZa5D50WcubLzcgnzJK3foXabitvTGQ2b1EFmAfw0CFzwNNS+AJYcJGJwkIjBQSItw+e6hsutbYyIuww4nthXdsI7j1PWbcZz/A6Jc/ictGJwkCjhO/vEdk+9dVXlwWIFuSj2LoTVM56mseV+uJ65XRlLDhIxOEjE4CCRnuFzc20X0jol+88GmcL3f1W3DLuGhbcxfL8f9IwlB4kYHCRicJBIz22CpilnFYkZu9BxAa8ScVGH4RBLuyyEy+ge92DJQSIGB4ncVys2e3DYrxl2GCv2OTrtZcQ5yKIrw2ZY3u4aYbGbYTDfUMZFTRQaBgeJGBwk8t2Vnb8w2LRZSpI9pUPbqjYP0+dmvn8RTT+ln24uSw4SMThIpGlW1k44+xCHV3HpuaFXXxY89F01Y8lBIgYHiRgcJHLf5rDb2tnynpc9t9wLbz9Rd+JxDcBD28Z8l52emYkolhwkYnCQiMFBIg/jHOYhcj0Z8L3BiOWudi8Z8rcKPF5tDvvBFfd3sSt/XzOKJQeJGBwkSvhCat/7c+laHmXZFNTLFX1Wio5TuMkxq82Sg0QMDhIxOEjkoc0xv6Kc9wxiy51gAXKkRdjbmkjXcfeWc3bctl+cThSs882Sg0QMDhIxOEjke5wjLrfP2TYdAjxTK4wce7mEruyYbhPUPbzPkoNEDA4S+Z6VDfJIq8SLw96iXtL6rGbC/plZcpCIwUEiBgeJEj9l74XfSjbhw/kOfN7E5nQa3n1OoWFwkCicaiXoYwl1Xj/VBbjBjE+HpNAwOEjE4CCR/z3B3N7ulfL1f4KXb9s9BNth4TT3IaXQMDhIxOAgUSjjHPMeqOyUOKXbJPF6RLW7pPZ70Lu/xByWHCRicJAolGrFU+dP10MTw2K30NrLwiVNvV5Pa6Xsxs9d/LgsOUjE4CARg4NEWtocQW68jmU3/Gv+bKB1yl74bSuENLKuaw81N1hykIjBQSIGB4n0jHOEsCepY9qkX1Wnic14iePCuYBz9iw5SMTgIJGm4fMEDHTbbPsc5OlWid/PzD+7asZSDXP4nIJgcJCIwUEiTW2OeO37aeefayZ/u8F/oyiejzJlyUEiBgeJtFQr5q5j8hfrySWsB0sHvBGMJQfJGBwkYnCQKKRFTXIl6noB9l9Hf8PCSzuCC6lJKwYHiRgcJIr7PqTB2iP/ltu//NE9icGSg0QMDhIl1fbW5irnr+31mqcb7N/Wchl2ZUkrBgeJGBwkMtS/dzybHLDkIBGDg0QMDhIxOEjE4CARg4NEDA4SMThIxOAg0f8AFhzOq9bpiZQAAAAASUVORK5CYII=", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'Image Type': , 'Image Shape': torch.Size([3, 32, 32]), 'Label Type': , 'Label Value': 1}\n" - ] - } - ], - "source": [ - "class Vehicle(Dataset):\n", - " def __init__(self, root: str=\"./dataset\", train: bool=True, transform=None):\n", - " root = os.path.join(root, \"Vehicles\")\n", - " csv_file = os.path.join(root, \"train.csv\" if train else \"test.csv\")\n", - " self.data = pd.read_csv(csv_file)\n", - " self.root = root\n", - " self.transform = transform\n", - "\n", - " def __len__(self):\n", - " return len(self.data)\n", - " \n", - " def __getitem__(self, index):\n", - " row = self.data.iloc[index]\n", - " img_name, label = row['Vehicle'], row['Label']\n", - " img_path = os.path.join(self.root, img_name)\n", - " image = Image.open(img_path)\n", - " label = int(label)\n", - " if self.transform:\n", - " image = self.transform(image)\n", - " return image, label\n", - "\n", - "image_size = 32\n", - "transform = transforms.Compose(\n", - " [\n", - " transforms.ToTensor(),\n", - " transforms.Resize((image_size, image_size), antialias=True),\n", - " transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),\n", - " ]\n", - ")\n", - "train_vehicle_dataset = Vehicle(root=\"./dataset\", train=True, transform=transform)\n", - "test_vehicle_dataset = Vehicle(root=\"./dataset\", train=False, transform=transform)\n", - "\n", - "print('Vehicle Train Dataset Size:', len(train_vehicle_dataset))\n", - "print('Vehicle Test Dataset Size:', len(test_vehicle_dataset))\n", - "\n", - "image, label = train_vehicle_dataset[0]\n", - "sample = {\n", - " 'Image Type': type(image),\n", - " 'Image Shape': image.shape,\n", - " 'Label Type': type(label),\n", - " 'Label Value': label\n", - "}\n", - "print('A Train Sample:\\n')\n", - "plt.figure(figsize=(1.5, 1.5))\n", - "plt.imshow(image.permute(1, 2, 0).numpy().astype(np.uint8) / 255)\n", - "plt.title(f\"Label: {label}\")\n", - "plt.axis('off')\n", - "plt.show()\n", - "print(sample)\n", - "\n", - "num_classes = 3" - ] - }, - { - "cell_type": "markdown", - "id": "a6917da1-4db8-4dba-b140-41d89e25ff4d", - "metadata": {}, - "source": [ - "定义多分类任务的trainer。" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "128a7e24-939d-4374-90d2-3f1a1fa682a7", - "metadata": {}, - "outputs": [], - "source": [ - "class MultiCLSTrainer():\n", - " def __init__(\n", - " self,\n", - " model,\n", - " train_dataset: Union[Dataset, DataLoader],\n", - " eval_dataset: Union[Dataset, DataLoader],\n", - " learning_rate: float,\n", - " num_epochs: int,\n", - " batch_size: int,\n", - " weight_decay: float = 0.0,\n", - " adam_beta1: float = 0.9,\n", - " adam_beta2: float = 0.999,\n", - " test_dataset: Union[Dataset, DataLoader] = None,\n", - " plot: bool = True, \n", - " print_test_result: bool = True,\n", - " logging_steps: int = 1,\n", - " eval_steps: int = 1,\n", - " print_log_epochs: int = 1,\n", - " print_eval: bool = True\n", - " ):\n", - " self.model = model\n", - " self.learning_rate = learning_rate\n", - " self.num_epochs = num_epochs\n", - " self.batch_size = batch_size\n", - " self.plot = plot\n", - " self.print_test_result = print_test_result\n", - " self.logging_steps = logging_steps\n", - " self.eval_steps = eval_steps\n", - " self.print_log_epochs = print_log_epochs\n", - " self.print_eval = print_eval\n", - " \n", - " if isinstance(train_dataset, Dataset):\n", - " self.train_dataloader = DataLoader(\n", - " dataset=train_dataset, batch_size=batch_size, shuffle=True, \n", - " num_workers=cpu_count-1, pin_memory=True\n", - " )\n", - " else:\n", - " self.train_dataloader = train_dataset\n", - " if isinstance(eval_dataset, Dataset):\n", - " self.eval_dataloader = DataLoader(\n", - " dataset=eval_dataset, batch_size=batch_size, shuffle=True, \n", - " num_workers=cpu_count-1, pin_memory=True\n", - " )\n", - " else:\n", - " self.eval_dataloader = eval_dataset\n", - " if isinstance(test_dataset, Dataset):\n", - " self.test_dataloader = DataLoader(\n", - " dataset=test_dataset, batch_size=batch_size, shuffle=True, \n", - " num_workers=cpu_count-1, pin_memory=True\n", - " )\n", - " else:\n", - " self.test_dataloader = test_dataset\n", - "\n", - " self.total_train_steps = self.num_epochs * len(self.train_dataloader)\n", - "\n", - " self.optimizer = torch.optim.AdamW(\n", - " model.parameters(), lr=learning_rate, \n", - " weight_decay=weight_decay, betas=(adam_beta1, adam_beta2)\n", - " )\n", - " self.criterion = nn.CrossEntropyLoss()\n", - "\n", - " def train(self):\n", - " train_loss_curve = []\n", - " eval_loss_curve = []\n", - " eval_acc_curve = []\n", - " step = 0\n", - " with tqdm(total=self.total_train_steps) as pbar:\n", - " for epoch in range(self.num_epochs):\n", - " total_train_loss = 0\n", - " for x, targets in self.train_dataloader:\n", - " x = x.to(device=device, dtype=torch.float32)\n", - " targets = targets.to(device=device, dtype=torch.long)\n", - "\n", - " self.optimizer.zero_grad()\n", - " output = self.model(x)\n", - " loss = self.criterion(output, targets)\n", - " total_train_loss += loss.item()\n", - " if (step + 1) % self.logging_steps == 0:\n", - " train_loss_curve.append((step + 1, loss.item()))\n", - " \n", - " loss.backward()\n", - " self.optimizer.step()\n", - " step += 1\n", - " pbar.update(1)\n", - "\n", - " if self.eval_steps > 0 and (step + 1) % self.eval_steps == 0:\n", - " avg_eval_loss, avg_eval_acc = self.eval()\n", - " eval_loss_curve.append((step + 1, avg_eval_loss))\n", - " eval_acc_curve.append((step + 1, avg_eval_acc))\n", - " eval_info = {\n", - " 'Epoch': f'{(step + 1) / len(self.train_dataloader):.1f}/{self.num_epochs}',\n", - " 'Total Valid Loss': f'{avg_eval_loss:.2f}',\n", - " 'Avg Valid Acc': f'{avg_eval_acc:.2%}'\n", - " }\n", - " if self.print_eval:\n", - " print(eval_info)\n", - " if self.print_log_epochs > 0 and (epoch + 1) % self.print_log_epochs == 0:\n", - " log_info = {\n", - " 'Epoch': f'{(step + 1) / len(self.train_dataloader):.1f}/{self.num_epochs}',\n", - " 'Total Train Loss': f'{total_train_loss:.2f}'\n", - " }\n", - " print(log_info)\n", - "\n", - " return_info = {}\n", - " if self.test_dataloader:\n", - " test_acc = self.test()\n", - " if self.print_test_result:\n", - " print('Avg Test Acc:', f'{test_acc:.2%}')\n", - " return_info['test_acc'] = test_acc\n", - " if self.plot:\n", - " self.plot_results(train_loss_curve, eval_loss_curve, eval_acc_curve)\n", - " return_info['curves'] = {\n", - " 'train_loss_curve': train_loss_curve,\n", - " 'eval_loss_curve': eval_loss_curve,\n", - " 'eval_acc_curve': eval_acc_curve\n", - " }\n", - " return return_info\n", - "\n", - " def eval(self):\n", - " total_eval_loss = 0\n", - " total_eval_acc = 0\n", - " total_eval_samples = 0\n", - " with torch.inference_mode():\n", - " for x, targets in self.eval_dataloader:\n", - " x = x.to(device=device, dtype=torch.float32)\n", - " targets = targets.to(device=device, dtype=torch.long)\n", - " output = self.model(x)\n", - " loss = self.criterion(output, targets)\n", - " total_eval_loss += loss.item()\n", - " preds = nn.functional.softmax(output, dim=1).argmax(dim=1)\n", - " total_eval_acc += (preds == targets).float().sum().item()\n", - " total_eval_samples += targets.numel()\n", - " avg_eval_loss = total_eval_loss / len(self.eval_dataloader)\n", - " avg_eval_acc = total_eval_acc / total_eval_samples\n", - " return avg_eval_loss, avg_eval_acc\n", - "\n", - " def test(self):\n", - " total_test_acc = 0\n", - " total_test_samples = 0\n", - " with torch.inference_mode():\n", - " for x, targets in self.test_dataloader:\n", - " x = x.to(device=device, dtype=torch.float32)\n", - " targets = targets.to(device=device, dtype=torch.long)\n", - " output = self.model(x)\n", - " preds = nn.functional.softmax(output, dim=1).argmax(dim=1)\n", - " total_test_acc += (preds == targets).float().sum().item()\n", - " total_test_samples += targets.numel()\n", - " avg_test_acc = total_test_acc / total_test_samples\n", - " return avg_test_acc\n", - " \n", - " def plot_results(self, train_loss_curve, eval_loss_curve, eval_acc_curve):\n", - " fig, axes = plt.subplots(1, 2, figsize=(10, 4))\n", - "\n", - " train_log_steps, train_losses = zip(*train_loss_curve)\n", - " axes[0].plot(train_log_steps, train_losses, label='Training Loss', color='blue')\n", - " eval_log_steps, eval_losses = zip(*eval_loss_curve)\n", - " axes[0].plot(eval_log_steps, eval_losses, label='Validation Loss', color='orange')\n", - " axes[0].set_xlabel('Step')\n", - " axes[0].set_ylabel('Loss')\n", - " axes[0].set_title('Loss Curve')\n", - " axes[0].legend()\n", - " axes[0].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - "\n", - " eval_log_steps, eval_accuracies = zip(*eval_acc_curve)\n", - " axes[1].plot(eval_log_steps, eval_accuracies, label='Validation Accuracy', color='green', marker='o')\n", - " axes[1].set_xlabel('Step')\n", - " axes[1].set_ylabel('Accuracy')\n", - " axes[1].set_title('Validation Accuracy Curve')\n", - " axes[1].legend()\n", - " axes[1].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - " \n", - " plt.tight_layout()\n", - " plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "87f4b8dd-5718-42b7-823d-6b8414551bc2", - "metadata": {}, - "source": [ - "## 1.1. 题目一\n", - "\n", - "**手写二维卷积的实现,并在至少一个数据集上进行实验,从训练时间、预测精度、Loss变化等角度分析实验结果(最好使用图表展示)(只用循环几轮即可)**" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "0e6166df-cb5a-43c4-9456-60041d30e717", - "metadata": {}, - "outputs": [], - "source": [ - "torch.cuda.empty_cache()" - ] - }, - { - "cell_type": "markdown", - "id": "df8196e0-cc68-465d-a05f-0325b376954b", - "metadata": {}, - "source": [ - "在传统的二维卷积中,卷积是通过一个滑动的卷积核进行计算的,这就意味着会有大量的`for`循环,会增加计算的时间复杂度。\n", - "\n", - "对于拥有良好矩阵运算性能的GPU来说,上面的计算可以进行优化,即:将卷积核转化为矩阵,原图像数据也裁剪成对应的矩阵,叠加起来,这样需要多层`for`循环的卷积运算就可以由一次矩阵运算完成。\n", - "\n", - "具体运算流程如下:\n", - "1. 将原图像进行`padding`操作;\n", - "2. 使用`nn.functional.unfold()`将原图像矩阵重塑为`(batch_size, -1, in_channels, kernel_size, kernel_size)`,其中`-1`会被替代为每张图片裁剪成了多少块,等于传统二维卷积的卷积核循环滑动计算次数;\n", - "3. 将卷积核重塑为对应图片碎块的卷积核矩阵;\n", - "4. 将两者进行矩阵相乘,一次计算完毕,加上偏置`bias`;\n", - "5. 重塑相乘结果,转化为正确的输出矩阵。\n", - "\n", - "代码实现如下。" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "080d8193-bb86-42b2-a685-fe51b8f7519e", - "metadata": {}, - "outputs": [], - "source": [ - "class My_Conv2d(nn.Module):\n", - " def __init__(self, in_channels:int, out_channels:int, kernel_size:int, padding:int=0, bias=True):\n", - " super(My_Conv2d, self).__init__()\n", - " self.has_bias = bias\n", - " self.in_channels = in_channels\n", - " self.out_channels = out_channels\n", - " self.kernel_size = kernel_size\n", - " self.padding = padding\n", - " self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels, kernel_size, kernel_size))\n", - " nn.init.xavier_uniform_(self.weight)\n", - " if self.has_bias:\n", - " self.bias = nn.Parameter(torch.zeros(out_channels, requires_grad=True, dtype=torch.float32))\n", - "\n", - " def forward(self, x):\n", - " batch_size, _, input_height, input_width = x.shape\n", - " if self.padding > 0:\n", - " x = nn.functional.pad(x, (self.padding, self.padding, self.padding, self.padding))\n", - " x = nn.functional.unfold(x, kernel_size=self.kernel_size)\n", - " x = x.permute(0, 2, 1).contiguous()\n", - " weight_unfold = self.weight.view(self.out_channels, -1).t()\n", - " x = torch.matmul(x, weight_unfold)\n", - " if self.has_bias:\n", - " x += self.bias\n", - " output_height = input_height + 2 * self.padding - self.kernel_size + 1\n", - " output_width = input_width + 2 * self.padding - self.kernel_size + 1\n", - " x = x.view(batch_size, output_height, output_width, self.out_channels).permute(0, 3, 1, 2).contiguous()\n", - " return x\n", - "\n", - "\n", - "class Model_1_1(nn.Module):\n", - " def __init__(self, image_size: int, num_classes=3):\n", - " super(Model_1_1, self).__init__()\n", - " self.net = nn.Sequential(collections.OrderedDict([\n", - " ('conv1', My_Conv2d(in_channels=3, out_channels=128, kernel_size=3, padding=1, bias=False)),\n", - " ('bn1', nn.BatchNorm2d(128)),\n", - " ('relu1', nn.ReLU(inplace=True)),\n", - " ('conv2', My_Conv2d(in_channels=128, out_channels=512, kernel_size=3, padding=1, bias=False)),\n", - " ('bn2', nn.BatchNorm2d(512)),\n", - " ('relu2', nn.ReLU(inplace=True)),\n", - " ('pool', nn.AvgPool2d(image_size)),\n", - " ('flatten', nn.Flatten()),\n", - " ('fc', nn.Linear(in_features=512, out_features=num_classes))\n", - " ]))\n", - "\n", - " def forward(self, x):\n", - " return self.net(x)" - ] - }, - { - "cell_type": "markdown", - "id": "b63eb5a9-dea0-4e04-870d-2b48e59cd721", - "metadata": {}, - "source": [ - "运行测试。" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "ddceee2f-7084-4ae8-9c73-adb4b18b8f32", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "9950ac2fadc44ba6b6f369265d354bc0", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/500 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "training_args = {\n", - " 'train_dataset': train_vehicle_dataset,\n", - " 'eval_dataset': test_vehicle_dataset,\n", - " 'learning_rate': 2.0e-4,\n", - " 'num_epochs': 100,\n", - " 'batch_size': 256,\n", - " 'weight_decay': 0.01,\n", - " 'logging_steps': 3,\n", - " 'eval_steps': 50,\n", - " 'print_log_epochs': 0\n", - "}\n", - "\n", - "model = Model_1_1(image_size=image_size, num_classes=num_classes).to(device)\n", - "trainer = MultiCLSTrainer(model=model, **training_args)\n", - "_ = trainer.train()" - ] - }, - { - "cell_type": "markdown", - "id": "08c6d960-9d0e-4282-9c3f-b493140c14ae", - "metadata": {}, - "source": [ - "模型能够正常收敛并且达到$90\\%$以上的准确率。" - ] - }, - { - "cell_type": "markdown", - "id": "29a8a92f-d610-44c1-883a-64d253afd351", - "metadata": {}, - "source": [ - "## 题目二\n", - "\n", - "**使用torch.nn实现二维卷积,并在至少一个数据集上进行实验,从训练时间、预测精度、Loss变化等角度分析实验结果(最好使用图表展示)**" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "9b2e4cf3-297a-480a-a8c3-5b9a1a391d25", - "metadata": {}, - "outputs": [], - "source": [ - "torch.cuda.empty_cache()" - ] - }, - { - "cell_type": "markdown", - "id": "2f8a9d69-cacf-4481-b17e-970d6c4745e4", - "metadata": {}, - "source": [ - "使用上面定义的二维卷积进行车辆分类的训练和预测。\n", - "\n", - "同时,使用`nn.Conv2d`组建相同结构的模型,与手写二维卷积组建的模型进行比较。" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "ad4d1aac-32e3-456b-b905-1708c72fe989", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "1113bcd152a0499b90f9530a09eb1836", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/500 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "class Model_1_2(nn.Module):\n", - " def __init__(self, image_size: int, num_classes=3):\n", - " super(Model_1_2, self).__init__()\n", - " self.net = nn.Sequential(collections.OrderedDict([\n", - " ('conv1', nn.Conv2d(in_channels=3, out_channels=128, kernel_size=3, padding=1, bias=False)),\n", - " ('bn1', nn.BatchNorm2d(128)),\n", - " ('relu1', nn.ReLU(inplace=True)),\n", - " ('conv2', nn.Conv2d(in_channels=128, out_channels=512, kernel_size=3, padding=1, bias=False)),\n", - " ('bn2', nn.BatchNorm2d(512)),\n", - " ('relu2', nn.ReLU(inplace=True)),\n", - " ('pool', nn.AvgPool2d(image_size)),\n", - " ('flatten', nn.Flatten()),\n", - " ('fc', nn.Linear(in_features=512, out_features=num_classes))\n", - " ]))\n", - "\n", - " def forward(self, x):\n", - " return self.net(x)\n", - "\n", - "\n", - "training_args = {\n", - " 'train_dataset': train_vehicle_dataset,\n", - " 'eval_dataset': test_vehicle_dataset,\n", - " 'learning_rate': 2.0e-4,\n", - " 'num_epochs': 100,\n", - " 'batch_size': 256,\n", - " 'weight_decay': 0.1,\n", - " 'logging_steps': 3,\n", - " 'eval_steps': 50,\n", - " 'print_log_epochs': 0\n", - "}\n", - "\n", - "model = Model_1_2(image_size=image_size, num_classes=num_classes).to(device)\n", - "trainer = MultiCLSTrainer(model=model, **training_args)\n", - "_ = trainer.train()" - ] - }, - { - "cell_type": "markdown", - "id": "5fc9101e-75f7-4b13-9abf-913e2db4543f", - "metadata": {}, - "source": [ - "很显然,在车辆分类的任务上,手动实现的二维卷积和`nn.Conv2d`都能够完成任务,且准确率相差不大。\n", - "\n", - "但是`nn.Conv2d`的优化显然比手动实现的好,每个epoch的训练用时和显存占用情况都优于手动实现的二维卷积。" - ] - }, - { - "cell_type": "markdown", - "id": "c4d35fc5-2af1-40b2-a762-80fc88d2401e", - "metadata": {}, - "source": [ - "## 1.3. 题目三\n", - "\n", - "**不同超参数的对比分析(包括卷积层数、卷积核大小、batchsize、lr等)选其中至少1-2个进行分析**" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "8d936883-9a7e-4f38-a361-87c44f2663c7", - "metadata": {}, - "outputs": [], - "source": [ - "torch.cuda.empty_cache()" - ] - }, - { - "cell_type": "markdown", - "id": "05937845-82a2-4261-9e8f-2da9d4f6e78d", - "metadata": {}, - "source": [ - "接下来从**卷积层数**进行对比分析。分别构造具有1、2、3、4个卷积层的模型,进行车辆分类任务的训练和预测。为控制变量,卷积层的输出统一为512个特征,变量为卷积层层数和各卷积层之间out_channels的大小分配。" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "583c3b5d-f807-4c51-9c6a-3212cf69ecec", - "metadata": {}, - "outputs": [], - "source": [ - "class Model_1_3(nn.Module):\n", - " def __init__(self, conv_config: list[tuple[int]], image_size: int, num_classes=3):\n", - " super(Model_1_3, self).__init__()\n", - " assert len(conv_config) >= 1\n", - " layers = collections.OrderedDict()\n", - " for i, (in_c, out_c, k, s, p, d) in enumerate(conv_config, start=1):\n", - " layers[f\"conv{i}\"] = nn.Conv2d(\n", - " in_channels=in_c, out_channels=out_c, kernel_size=k, \n", - " stride=s, padding=p, dilation=d, bias=False\n", - " )\n", - " layers[f\"bn{i}\"] = nn.BatchNorm2d(out_c)\n", - " layers[f\"relu{i}\"] = nn.ReLU(inplace=True)\n", - " layers[\"avgpool\"] = nn.AvgPool2d(image_size)\n", - " layers[\"flatten\"] = nn.Flatten()\n", - " layers[\"fc\"] = nn.Linear(in_features=512, out_features=num_classes)\n", - " self.net = nn.Sequential(layers)\n", - "\n", - " def forward(self, x):\n", - " return self.net(x)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "22e14159-fd98-43c3-8a54-3c68a63d7d51", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "模型1(1层卷积)开始训练:\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "9667fd6f7b7d4b77ac575b5d89ff6023", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/500 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "conv_configs = [\n", - " [(3, 512, 3, 1, 1, 1),],\n", - " [(3, 128, 3, 1, 1, 1), (128, 512, 3, 1, 1, 1),],\n", - " [(3, 64, 3, 1, 1, 1), (64, 256, 3, 1, 1, 1), (256, 512, 3, 1, 1, 1),],\n", - " [(3, 64, 3, 1, 1, 1), (64, 128, 3, 1, 1, 1), (128, 256, 3, 1, 1, 1), (256, 512, 3, 1, 1, 1),],\n", - "]\n", - "plot_colors = ['blue', 'green', 'orange', 'purple']\n", - "\n", - "fig, axes = plt.subplots(1, 2, figsize=(7, 3.5))\n", - "\n", - "axes[0].set_xlabel('Step')\n", - "axes[0].set_ylabel('Loss')\n", - "axes[0].set_title('Validation Loss Curve')\n", - "axes[0].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - "axes[1].set_xlabel('Step')\n", - "axes[1].set_ylabel('Accuracy')\n", - "axes[1].set_title('Validation Accuracy Curve')\n", - "axes[1].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - "\n", - "training_args = {\n", - " 'train_dataset': train_vehicle_dataset,\n", - " 'eval_dataset': test_vehicle_dataset,\n", - " 'learning_rate': 2.0e-5,\n", - " 'num_epochs': 100,\n", - " 'batch_size': 256,\n", - " 'weight_decay': 0.1,\n", - " 'logging_steps': 3,\n", - " 'eval_steps': 50,\n", - " 'plot': False,\n", - " 'print_log_epochs': 0,\n", - " 'print_eval': False\n", - "}\n", - "\n", - "for index, conv_config in enumerate(conv_configs):\n", - " model = Model_1_3(conv_config, image_size=image_size, num_classes=num_classes).to(device)\n", - " \n", - " print(f\"模型{index + 1}({len(conv_config)}层卷积)开始训练:\")\n", - " trainer = MultiCLSTrainer(model=model, **training_args)\n", - " curves = trainer.train()['curves']\n", - "\n", - " eval_log_steps, eval_losses = zip(*curves['eval_loss_curve'])\n", - " axes[0].plot(\n", - " eval_log_steps, eval_losses,\n", - " label=f\"conv layers={len(conv_config)}\", color=plot_colors[index]\n", - " )\n", - " eval_log_steps, eval_accuracies = zip(*curves['eval_acc_curve'])\n", - " axes[1].plot(\n", - " eval_log_steps, eval_accuracies, \n", - " label=f\"conv layers={len(conv_config)}\", color=plot_colors[index]\n", - " )\n", - "\n", - "axes[0].legend()\n", - "axes[1].legend()\n", - "plt.tight_layout()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "388f5b62-31b5-4b2f-be4b-145095fcd03f", - "metadata": {}, - "source": [ - "模型训练的显存占用、单个epoch的训练/测试时长都随着卷积层的数量增加而增加。\n", - "\n", - "从曲线上看,模型训练的稳定程度随着卷积层数量的增加而增加。\n", - "\n", - "当卷积层数量逐渐增加,正确率提高,说明模型的拟合能力也逐渐提高。" - ] - }, - { - "cell_type": "markdown", - "id": "dbd99d1b-9599-44e3-825a-47013608523a", - "metadata": {}, - "source": [ - "对**卷积核大小**进行比较分析。分别构造卷积核大小为3、5、7、9的模型,进行车辆识别任务的训练和预测。" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "4bca87f1-f637-4651-a4ed-6256fe1d4a26", - "metadata": {}, - "outputs": [], - "source": [ - "torch.cuda.empty_cache()" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "6edf7de8-45ee-4735-93a1-dae1b711cf0f", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "模型1(卷积核大小=3)开始训练:\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "e13bf785dfed43c58c6bb48c23618c40", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/500 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "conv_configs = [\n", - " [(3, 128, 3, 1, 1, 1), (128, 512, 3, 1, 1, 1),],\n", - " [(3, 128, 5, 1, 2, 1), (128, 512, 5, 1, 2, 1),],\n", - " [(3, 128, 7, 1, 3, 1), (128, 512, 7, 1, 3, 1),],\n", - " [(3, 128, 9, 1, 4, 1), (128, 512, 9, 1, 4, 1),]\n", - "]\n", - "plot_colors = ['blue', 'green', 'orange', 'purple']\n", - "\n", - "fig, axes = plt.subplots(1, 2, figsize=(7, 3.5))\n", - "\n", - "axes[0].set_xlabel('Step')\n", - "axes[0].set_ylabel('Loss')\n", - "axes[0].set_title('Validation Loss Curve')\n", - "axes[0].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - "axes[1].set_xlabel('Step')\n", - "axes[1].set_ylabel('Accuracy')\n", - "axes[1].set_title('Validation Accuracy Curve')\n", - "axes[1].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - "\n", - "training_args = {\n", - " 'train_dataset': train_vehicle_dataset,\n", - " 'eval_dataset': test_vehicle_dataset,\n", - " 'learning_rate': 1.0e-5,\n", - " 'num_epochs': 100,\n", - " 'batch_size': 256,\n", - " 'weight_decay': 0.1,\n", - " 'logging_steps': 3,\n", - " 'eval_steps': 50,\n", - " 'plot': False,\n", - " 'print_log_epochs': 0,\n", - " 'print_eval': False\n", - "}\n", - "\n", - "for index, conv_config in enumerate(conv_configs):\n", - " model = Model_1_3(conv_config, image_size=image_size, num_classes=num_classes).to(device)\n", - " \n", - " print(f\"模型{index + 1}(卷积核大小={conv_config[0][2]})开始训练:\")\n", - " trainer = MultiCLSTrainer(model=model, **training_args)\n", - " curves = trainer.train()['curves']\n", - "\n", - " eval_log_steps, eval_losses = zip(*curves['eval_loss_curve'])\n", - " axes[0].plot(\n", - " eval_log_steps, eval_losses,\n", - " label=f\"kernel size={conv_config[0][2]}\", color=plot_colors[index]\n", - " )\n", - " eval_log_steps, eval_accuracies = zip(*curves['eval_acc_curve'])\n", - " axes[1].plot(\n", - " eval_log_steps, eval_accuracies, \n", - " label=f\"kernel size={conv_config[0][2]}\", color=plot_colors[index]\n", - " )\n", - "\n", - "axes[0].legend()\n", - "axes[1].legend()\n", - "plt.tight_layout()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "cdff91d8-f1ac-4f1f-81d7-f2d8a82e8c80", - "metadata": {}, - "source": [ - "随着卷积核的增大,训练时长增加,显存占用也增加,性能也增加。这是由于卷积核增大,参数量增加的结果。" - ] - }, - { - "cell_type": "markdown", - "id": "2b049ba1-b908-4e9b-ac37-3db4abeb5df2", - "metadata": {}, - "source": [ - "## 1.4. 题目四\n", - "\n", - "**使用PyTorch实现经典模型AlexNet并在至少一个数据集进行试验分析**" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "f91714e7-a95c-496f-94a9-5a170e4df6c6", - "metadata": {}, - "outputs": [], - "source": [ - "torch.cuda.empty_cache()" - ] - }, - { - "cell_type": "markdown", - "id": "9b52cd44-23ef-43a7-9f19-b12193cd430b", - "metadata": {}, - "source": [ - "构建AlexNet网络。为匹配车辆识别数据集,输出维度为3。" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "b9bd9b7d-ae8c-44e3-bb78-f332018b8c89", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f18a1422c7bd4516bad0cf07665e720b", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/500 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "class AlexNet(nn.Module):\n", - " def __init__(self):\n", - " super(AlexNet, self).__init__()\n", - " self.features = nn.Sequential(collections.OrderedDict([\n", - " ('conv1', nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4, padding=0)), # 55 * 55\n", - " ('relu1', nn.ReLU(inplace=True)),\n", - " ('pool1', nn.MaxPool2d(kernel_size=3, stride=2)), # 27 * 27\n", - " ('conv2', nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2)), # 27 * 27\n", - " ('relu2', nn.ReLU(inplace=True)),\n", - " ('pool2', nn.MaxPool2d(kernel_size=3, stride=2)), # 13 * 13\n", - " ('conv3', nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1)), # 13 * 13\n", - " ('relu3', nn.ReLU(inplace=True)),\n", - " ('conv4', nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1)), # 13 * 13\n", - " ('relu4', nn.ReLU(inplace=True)),\n", - " ('conv5', nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1)), # 13 * 13\n", - " ('relu5', nn.ReLU(inplace=True)),\n", - " ('pool5', nn.MaxPool2d(kernel_size=3, stride=2)), # 6 * 6\n", - " ]))\n", - " self.classifier = nn.Sequential(collections.OrderedDict([\n", - " ('fc6', nn.Linear(in_features=9216, out_features=4096)),\n", - " ('relu6', nn.ReLU(inplace=True)),\n", - " ('dropout6', nn.Dropout(p=0.5)),\n", - " ('fc7', nn.Linear(in_features=4096, out_features=4096)),\n", - " ('relu7', nn.ReLU(inplace=True)),\n", - " ('dropout7', nn.Dropout(p=0.5)),\n", - " ('fc8', nn.Linear(in_features=4096, out_features=3)),\n", - " ]))\n", - "\n", - " def forward(self, x):\n", - " x = self.features(x)\n", - " x = torch.flatten(x, 1)\n", - " x = self.classifier(x)\n", - " return x\n", - "\n", - "\n", - "alexnet_transform = transforms.Compose(\n", - " [\n", - " transforms.ToTensor(),\n", - " transforms.Resize((227, 227), antialias=True),\n", - " transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),\n", - " ]\n", - ")\n", - "train_alexnet_dataset = Vehicle(root=\"./dataset\", train=True, transform=alexnet_transform)\n", - "test_alexnet_dataset = Vehicle(root=\"./dataset\", train=False, transform=alexnet_transform)\n", - "\n", - "training_args = {\n", - " 'train_dataset': train_alexnet_dataset,\n", - " 'eval_dataset': test_alexnet_dataset,\n", - " 'learning_rate': 1.0e-5,\n", - " 'num_epochs': 100,\n", - " 'batch_size': 256,\n", - " 'weight_decay': 5.0e-5,\n", - " 'logging_steps': 3,\n", - " 'eval_steps': 50,\n", - " 'print_log_epochs': 0\n", - "}\n", - "\n", - "model = AlexNet().to(device)\n", - "trainer = MultiCLSTrainer(model=model, **training_args)\n", - "_ = trainer.train()" - ] - }, - { - "cell_type": "markdown", - "id": "4f6d3679-fd02-4b5b-9f06-5dcd0e53b301", - "metadata": {}, - "source": [ - "实验表明,AlexNet在车辆识别数据集上能够正常收敛,且准确率达到了$90\\%$以上。但是由于模型复杂度较高,训练到最后存在过拟合问题。可以通过增加dropout层缓解这个问题。" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "5ac6541c-c367-4fc1-a6b7-1cf2c3cc0062", - "metadata": {}, - "source": [ - "# 2. 空洞卷积实验\n", - "\n", - "- 使用torch.nn实现空洞卷积,要求dilation满足HDC条件(如1,2,5)且要堆叠多层并在至少一个数据集上进行实验,从训练时间、预测精度、Loss变化等角度分析实验结果(最好使用图表展示)\n", - "- 将空洞卷积模型的实验结果与卷积模型的结果进行分析比对,训练时间、预测精度、Loss变化等角度分析\n", - "- 不同超参数的对比分析(包括卷积层数、卷积核大小、不同dilation的选择,batchsize、lr等)选其中至少1-2个进行分析(选做)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "c5111a37-674f-4468-a8cb-4a267814e645", - "metadata": {}, - "outputs": [], - "source": [ - "torch.cuda.empty_cache()" - ] - }, - { - "cell_type": "markdown", - "id": "8aaefc6f-3866-4bf2-b070-b2a03f3eeaa6", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, - "source": [ - "对**dilation**进行比较分析。分别构造dilation为\n", - "- \\[\\[1, 1, 1\\], \\[1, 1, 1\\]\\](普通卷积)\n", - "- \\[\\[1, 2, 5\\], \\[1, 2, 5\\]\\]\n", - "- \\[\\[1, 3, 5\\], \\[1, 3, 5\\]\\]\n", - "- \\[\\[1, 3, 7\\], \\[1, 3, 7\\]\\]\n", - "\n", - "的模型,进行车辆分类任务的训练和预测。为控制变量,`channels`的变化统一为\\[3, 16, 32, 64, 128, 256, 512\\]。" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "b31ad185-f854-4ae4-97dc-3044ab157288", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "模型1(dilation=[1, 1, 1])开始训练:\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "67473f0f22bb4aadb3634aa5ed099f5a", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/500 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "conv_configs = [\n", - " [(3, 16, 3, 1, 1, 1), (16, 32, 3, 1, 1, 1), (32, 64, 3, 1, 1, 1), \n", - " (64, 128, 3, 1, 1, 1), (128, 256, 3, 1, 1, 1), (256, 512, 3, 1, 1, 1),],\n", - " [(3, 16, 3, 1, 1, 1), (16, 32, 3, 1, 2, 2), (32, 64, 3, 1, 5, 5), \n", - " (64, 128, 3, 1, 1, 1), (128, 256, 3, 1, 2, 2), (256, 512, 3, 1, 5, 5),],\n", - " [(3, 16, 3, 1, 1, 1), (16, 32, 3, 1, 3, 3), (32, 64, 3, 1, 5, 5), \n", - " (64, 128, 3, 1, 1, 1), (128, 256, 3, 1, 3, 3), (256, 512, 3, 1, 5, 5),],\n", - " [(3, 16, 3, 1, 1, 1), (16, 32, 3, 1, 3, 3), (32, 64, 3, 1, 7, 7), \n", - " (64, 128, 3, 1, 1, 1), (128, 256, 3, 1, 3, 3), (256, 512, 3, 1, 7, 7),],\n", - "]\n", - "plot_colors = ['blue', 'green', 'orange', 'purple']\n", - "\n", - "fig, axes = plt.subplots(1, 2, figsize=(7, 3.5))\n", - "\n", - "axes[0].set_xlabel('Step')\n", - "axes[0].set_ylabel('Loss')\n", - "axes[0].set_title('Validation Loss Curve')\n", - "axes[0].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - "axes[1].set_xlabel('Step')\n", - "axes[1].set_ylabel('Accuracy')\n", - "axes[1].set_title('Validation Accuracy Curve')\n", - "axes[1].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - "\n", - "training_args = {\n", - " 'train_dataset': train_vehicle_dataset,\n", - " 'eval_dataset': test_vehicle_dataset,\n", - " 'learning_rate': 5.0e-6,\n", - " 'num_epochs': 100,\n", - " 'batch_size': 256,\n", - " 'weight_decay': 0.1,\n", - " 'logging_steps': 3,\n", - " 'eval_steps': 20,\n", - " 'plot': False,\n", - " 'print_log_epochs': 0,\n", - " 'print_eval': False\n", - "}\n", - "\n", - "for index, conv_config in enumerate(conv_configs):\n", - " model = Model_1_3(conv_config, image_size=image_size, num_classes=num_classes).to(device)\n", - " dilation_str = f'dilation=[{conv_config[0][5]}, {conv_config[1][5]}, {conv_config[2][5]}]'\n", - " \n", - " print(f\"模型{index + 1}({dilation_str})开始训练:\")\n", - " trainer = MultiCLSTrainer(model=model, **training_args)\n", - " curves = trainer.train()['curves']\n", - "\n", - " eval_log_steps, eval_losses = zip(*curves['eval_loss_curve'])\n", - " axes[0].plot(\n", - " eval_log_steps, eval_losses,\n", - " label=dilation_str, color=plot_colors[index]\n", - " )\n", - " eval_log_steps, eval_accuracies = zip(*curves['eval_acc_curve'])\n", - " axes[1].plot(\n", - " eval_log_steps, eval_accuracies, \n", - " label=dilation_str, color=plot_colors[index]\n", - " )\n", - "\n", - "axes[0].legend()\n", - "axes[1].legend()\n", - "plt.tight_layout()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "e02fc389-bdf4-467f-8e14-511a66255602", - "metadata": {}, - "source": [ - "从loss曲线和准确率曲线来看,4种dilation配置最终都能够收敛到较好的结果(准确率$90\\%$左右),导致的原因是模型复杂度高而训练数据量少。\n", - "\n", - "但是从收敛速度来看,准确率曲线左端,dilation为$[1, 3, 7]$的模型在第一次验证时就已经达到约$70\\%$的准确率,相比之下普通卷积的准确率还不到$50\\%$,其他两种配置的准确率介于两者之间;验证集loss同理。\n", - "\n", - "说明dilation跨度越大的模型,收敛越快,空洞卷积模型有更好的拟合能力。\n", - "\n", - "从训练速度来看,dilation的模型训练时长显著多于普通卷积,猜测是改变图形形状适配空洞卷积核的步骤会耗费较多的算力。" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "995aecd0-3490-44f5-8733-7c626e368e6d", - "metadata": {}, - "source": [ - "# 3. 残差网络实验\n", - "\n", - "- 实现给定结构的残差网络,在至少一个数据集上进行实验,从训练时间、预测精度、Loss变化等角度分析实验结果(最好使用图表展示)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "97f25712-792e-4428-9856-290419980557", - "metadata": {}, - "outputs": [], - "source": [ - "torch.cuda.empty_cache()" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "ae6a09f9-f568-4f2f-bba4-f247106c367a", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "9833117f476b460aa3b2d5d051464f1c", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/500 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "class BasicResidualBlock(nn.Module):\n", - " def __init__(self, in_channels, out_channels, stride=1):\n", - " super(BasicResidualBlock, self).__init__()\n", - " self.conv1 = nn.Sequential(\n", - " nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),\n", - " nn.BatchNorm2d(out_channels),\n", - " nn.ReLU(inplace=True)\n", - " )\n", - " self.conv2 = nn.Sequential(\n", - " nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False),\n", - " nn.BatchNorm2d(out_channels),\n", - " )\n", - " self.relu = nn.ReLU(inplace=True)\n", - " self.shortcut = nn.Sequential()\n", - " if stride != 1 or in_channels != out_channels:\n", - " self.shortcut = nn.Sequential(\n", - " nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),\n", - " nn.BatchNorm2d(out_channels)\n", - " )\n", - "\n", - " def forward(self, x):\n", - " return self.relu(self.conv2(self.conv1(x)) + self.shortcut(x))\n", - "\n", - "\n", - "class ResNet(nn.Module):\n", - " def __init__(self, num_classes=3):\n", - " super(ResNet, self).__init__()\n", - " self.features = nn.Sequential(collections.OrderedDict([\n", - " ('conv1', nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1, bias=False)),\n", - " ('bn1', nn.BatchNorm2d(64)),\n", - " ('relu1', nn.ReLU(inplace=True)),\n", - " ('resnet_block2', BasicResidualBlock(in_channels=64, out_channels=64)),\n", - " ('resnet_block3', BasicResidualBlock(in_channels=64, out_channels=64)),\n", - " ('resnet_block4', BasicResidualBlock(in_channels=64, out_channels=128, stride=2)),\n", - " ('resnet_block5', BasicResidualBlock(in_channels=128, out_channels=128)),\n", - " ('resnet_block6', BasicResidualBlock(in_channels=128, out_channels=256, stride=2)),\n", - " ('resnet_block7', BasicResidualBlock(in_channels=256, out_channels=256)),\n", - " ('resnet_block8', BasicResidualBlock(in_channels=256, out_channels=512, stride=2)),\n", - " ('resnet_block9', BasicResidualBlock(in_channels=512, out_channels=512)),\n", - " ('pool', nn.AvgPool2d(4)),\n", - " ]))\n", - " self.classifier = nn.Linear(in_features=512, out_features=num_classes)\n", - "\n", - " def forward(self, x):\n", - " x = self.features(x)\n", - " x = self.classifier(torch.flatten(x, 1))\n", - " return x\n", - "\n", - "\n", - "training_args = {\n", - " 'train_dataset': train_vehicle_dataset,\n", - " 'eval_dataset': test_vehicle_dataset,\n", - " 'learning_rate': 5.0e-6,\n", - " 'num_epochs': 100,\n", - " 'batch_size': 256,\n", - " 'weight_decay': 0.1,\n", - " 'logging_steps': 3,\n", - " 'eval_steps': 50,\n", - " 'print_log_epochs': 0\n", - "}\n", - "model = ResNet(num_classes=num_classes).to(device)\n", - "trainer = MultiCLSTrainer(model=model, **training_args)\n", - "_ = trainer.train()" - ] - }, - { - "cell_type": "markdown", - "id": "82793282-b7c0-49e4-9292-f5f58738d0e6", - "metadata": {}, - "source": [ - "实验证明,残差网络的效果比纯卷积网络好。原因在于残差网络能保留原图片输入的大部分特征,不会在卷积计算中遗漏。\n", - "\n", - "但是由于网络复杂度比较高,模型出现了过拟合,可以加入dropout缓解。" - ] - }, - { - "cell_type": "markdown", - "id": "dbd67b91-d8b8-4ffc-882a-3d47ee3b9d87", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, - "source": [ - "# 心得体会\n", - "\n", - "通过本次卷积神经网络实验,我深入理解和掌握了卷积神经网络的原理,并且在多个数据集上设计并训练了不同结构的卷积神经网络模型,通过比较分析不同模型的性能,加深了我对卷积神经网络中不同组件作用的理解。\n", - "\n", - "实验中,我实现了自定义的二维卷积运算,并在车辆分类任务上与PyTorch内置的二维卷积运算进行了比较。这让我深入理解了卷积运算转换为矩阵运算的过程,以及GPU对矩阵运算的加速优化。我也对比研究了卷积层数、卷积核大小等超参数对模型性能的影响。\n", - "\n", - "通过对dilation参数的研究,我意识到在神经网络设计中参数选择的细微差别可能导致显著的性能变化。特别是在对比普通卷积和不同dilation配置的空洞卷积时,我观察到虽然较高的dilation在一开始的学习效率和准确率方面可能不如普通卷积,但随着训练的进行,它们能更好地拟合数据且较不易过拟合。这种洞见对我理解如何平衡网络的学习速度和泛化能力非常有帮助。\n", - "\n", - "此外,通过残差网络的实验,我学到了网络结构设计的重要性。残差网络能够有效地解决深度网络训练过程中的梯度消失问题,同时保留更多的原始特征信息。这一点在实验中得到了明显的体现,残差网络在几乎所有指标上都优于传统的纯卷积网络。\n", - "\n", - "在实验过程中,我也遇到了一些挑战,比如调整网络参数以避免过拟合,以及理解不同网络结构背后的理论基础。通过不断尝试和阅读相关文献,我逐步克服了这些难题,并对这些概念有了更深刻的理解。\n", - "\n", - "通过整个实验,我掌握了卷积神经网络的组成结构,了解了调节不同超参数对模型性能的影响,加深了对卷积神经网络代表性结构的理解,为后续课程项目奠定了基础。我会在今后的学习中进一步深化对卷积神经网络的研究,运用到更多实际问题中。这次实验不仅增强了我的技术技能,也加深了我对深度学习领域的热情和认识。我期待未来能在这一领域继续探索和成长。" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Lab4/.ipynb_checkpoints/循环神经网络实验-checkpoint.ipynb b/Lab4/.ipynb_checkpoints/循环神经网络实验-checkpoint.ipynb deleted file mode 100644 index 30b19f8..0000000 --- a/Lab4/.ipynb_checkpoints/循环神经网络实验-checkpoint.ipynb +++ /dev/null @@ -1,1350 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "11a0e575-4d40-4889-ba1b-e522ed3c6c61", - "metadata": {}, - "source": [ - "

研究生《深度学习》课程
实验报告

\n", - "
\n", - "
课程名称:深度学习 M502019B
\n", - "
实验题目:循环神经网络实验
\n", - "
学号:25120323
\n", - "
姓名:柯劲帆
\n", - "
授课老师:原继东
\n", - "
报告日期:2025年8月27日
\n", - "
" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "24298f69-4181-4d19-a5b5-324c73b572ed", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Pytorch version: 2.7.1+cu118\n", - "CUDA version: 11.8\n", - "CUDA device count: 1\n", - "CUDA device name: NVIDIA TITAN Xp\n", - "CUDA device capability: (6, 1)\n", - "CUDA device memory: 11.90 GB\n", - "CPU count: 8\n" - ] - } - ], - "source": [ - "import os\n", - "import numpy as np\n", - "import torch\n", - "from torch.autograd import Variable\n", - "from torch.utils.data import Dataset, DataLoader, Subset, random_split\n", - "from torch import nn\n", - "from torchvision import datasets, transforms\n", - "from PIL import Image\n", - "from multiprocessing import cpu_count\n", - "import matplotlib.pyplot as plt\n", - "from tqdm.notebook import tqdm\n", - "import pandas as pd\n", - "import collections\n", - "from typing import Literal, Union, Optional, List\n", - "\n", - "print('Pytorch version:',torch.__version__)\n", - "if not torch.cuda.is_available():\n", - " print('CUDA is_available:', torch.cuda.is_available())\n", - "else:\n", - " print('CUDA version:', torch.version.cuda)\n", - " print('CUDA device count:', torch.cuda.device_count())\n", - " print('CUDA device name:', torch.cuda.get_device_name())\n", - " print('CUDA device capability:', torch.cuda.get_device_capability())\n", - " print('CUDA device memory:', f'{torch.cuda.get_device_properties(0).total_memory/1024/1024/1024:.2f}', 'GB')\n", - "print('CPU count:', cpu_count())\n", - "\n", - "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", - "seed = 42\n", - "np.random.seed(seed)\n", - "torch.manual_seed(seed)\n", - "torch.cuda.manual_seed(seed)\n", - "cpu_count = cpu_count()" - ] - }, - { - "cell_type": "markdown", - "id": "39399543-2bcb-49d3-a7cd-601b69e5083a", - "metadata": {}, - "source": [ - "# 1. \n", - "\n", - "**手动实现循环神经网络RNN,并在至少一个数据集上进行实验,从训练时间、预测精度、Loss变化等角度分析实验结果(最好使用图表展示)**" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "84193537-6555-4b67-8bd5-5e0dc83a635b", - "metadata": {}, - "outputs": [], - "source": [ - "torch.cuda.empty_cache()" - ] - }, - { - "cell_type": "markdown", - "id": "01f7e57a-ad71-4ebd-ab90-6d1f0ee35ad9", - "metadata": {}, - "source": [ - "构建数据集。" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "81cf14e0-3202-425c-8c34-c4699c893f7d", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "训练集第1个样本输入:tensor([[0.4480, 0.1673, 0.3450],\n", - " [0.5020, 0.1792, 0.3330],\n", - " [0.5060, 0.1808, 0.3340],\n", - " [0.4760, 0.1613, 0.3400],\n", - " [0.4710, 0.1740, 0.3460],\n", - " [0.4880, 0.1926, 0.3310],\n", - " [0.4550, 0.2177, 0.3180],\n", - " [0.4830, 0.1996, 0.3140],\n", - " [0.4730, 0.1897, 0.3230],\n", - " [0.4600, 0.1704, 0.3240],\n", - " [0.5380, 0.1502, 0.3600],\n", - " [0.5450, 0.1470, 0.3810],\n", - " [0.4950, 0.1628, 0.3670],\n", - " [0.4600, 0.1744, 0.3450],\n", - " [0.4730, 0.1739, 0.3470],\n", - " [0.4780, 0.1833, 0.3320],\n", - " [0.4490, 0.1705, 0.3360],\n", - " [0.5150, 0.1865, 0.3330],\n", - " [0.4570, 0.1795, 0.3280],\n", - " [0.5160, 0.1755, 0.3410],\n", - " [0.4870, 0.1775, 0.3370],\n", - " [0.4340, 0.1677, 0.3280],\n", - " [0.4820, 0.1656, 0.3320],\n", - " [0.5080, 0.1626, 0.3420],\n", - " [0.4810, 0.1546, 0.3630],\n", - " [0.5070, 0.1399, 0.3870],\n", - " [0.5200, 0.1369, 0.3980],\n", - " [0.4300, 0.0868, 0.4370],\n", - " [0.5030, 0.0869, 0.5010],\n", - " [0.5280, 0.0906, 0.5300],\n", - " [0.5230, 0.0869, 0.5550],\n", - " [0.4540, 0.0741, 0.5720]])\n", - "训练集第1个样本标签:tensor([0.5000])\n" - ] - } - ], - "source": [ - "class TrafficDataset(Dataset):\n", - " def __init__(self, inputs, targets):\n", - " self.inputs = torch.Tensor(inputs)\n", - " # print(self.inputs.shape)\n", - " self.targets = torch.Tensor(targets).unsqueeze(1)\n", - " # print(self.targets.shape)\n", - "\n", - " def __getitem__(self, index):\n", - " return self.inputs[index], self.targets[index]\n", - "\n", - " def __len__(self):\n", - " return self.targets.shape[0]\n", - "\n", - "\n", - "def make_traffic_datasets(\n", - " file_path: str, sensor: int = 10, target: int = 0, \n", - " train_por = 0.6, test_por = 0.2, window_size = 32, label_col = 0\n", - "):\n", - " raw_data = np.load(file_path)['data']\n", - " scaled_data = raw_data * np.array([1.0e-3, 1.0, 1.0e-2])\n", - " sensor_data = scaled_data[:, sensor, :]\n", - "\n", - " window_inputs = np.stack([\n", - " sensor_data[i : i + window_size] \n", - " for i in range(len(sensor_data) - window_size - 1)\n", - " ], axis=0)\n", - " labels = sensor_data[window_size:, label_col]\n", - "\n", - " shuffle_idx = np.arange(len(window_inputs))\n", - " np.random.shuffle(shuffle_idx)\n", - " window_inputs = window_inputs[shuffle_idx]\n", - " labels = labels[shuffle_idx]\n", - "\n", - " len_train = int(len(labels) * train_por)\n", - " len_test = int(len(labels) * test_por)\n", - " len_valid = len(labels) - len_train - len_test\n", - "\n", - " train_dataset = TrafficDataset(inputs=window_inputs[:len_train, :], targets=labels[:len_train])\n", - " valid_dataset = TrafficDataset(inputs=window_inputs[len_train:len_train+len_valid, :], targets=labels[len_train:len_train+len_valid])\n", - " test_dataset = TrafficDataset(inputs=window_inputs[len_train+len_valid:, :], targets=labels[len_train+len_valid:])\n", - "\n", - " return train_dataset, valid_dataset, test_dataset\n", - "\n", - "\n", - "train_dataset, valid_dataset, test_dataset = make_traffic_datasets('./dataset/traffic-flow/raw/PEMS04.npz')\n", - "x, y = train_dataset[0]\n", - "print(f\"训练集第1个样本输入:{x}\")\n", - "print(f\"训练集第1个样本标签:{y}\")" - ] - }, - { - "cell_type": "markdown", - "id": "07177026-fb83-4f9e-b462-bc7f503f3040", - "metadata": {}, - "source": [ - "构建序列回归任务的Trainer。" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "13e71bcd-3005-4bd1-aed5-73624e3c8f15", - "metadata": {}, - "outputs": [], - "source": [ - "class Trainer():\n", - " def __init__(\n", - " self,\n", - " model,\n", - " train_dataset: Union[Dataset, DataLoader],\n", - " eval_dataset: Union[Dataset, DataLoader],\n", - " learning_rate: float,\n", - " num_epochs: int,\n", - " batch_size: int,\n", - " weight_decay: float = 0.0,\n", - " adam_beta1: float = 0.9,\n", - " adam_beta2: float = 0.999,\n", - " test_dataset: Union[Dataset, DataLoader] = None,\n", - " plot: bool = True, \n", - " print_test_result: bool = True,\n", - " logging_steps: int = 1,\n", - " eval_steps: int = 1,\n", - " print_log_epochs: int = 1,\n", - " print_eval: bool = True\n", - " ):\n", - " self.model = model\n", - " self.learning_rate = learning_rate\n", - " self.num_epochs = num_epochs\n", - " self.batch_size = batch_size\n", - " self.plot = plot\n", - " self.print_test_result = print_test_result\n", - " self.logging_steps = logging_steps\n", - " self.eval_steps = eval_steps\n", - " self.print_log_epochs = print_log_epochs\n", - " self.print_eval = print_eval\n", - " \n", - " if isinstance(train_dataset, Dataset):\n", - " self.train_dataloader = DataLoader(\n", - " dataset=train_dataset, batch_size=batch_size, shuffle=True, \n", - " num_workers=cpu_count-1, pin_memory=True\n", - " )\n", - " else:\n", - " self.train_dataloader = train_dataset\n", - " if isinstance(eval_dataset, Dataset):\n", - " self.eval_dataloader = DataLoader(\n", - " dataset=eval_dataset, batch_size=batch_size, shuffle=True, \n", - " num_workers=cpu_count-1, pin_memory=True\n", - " )\n", - " else:\n", - " self.eval_dataloader = eval_dataset\n", - " if isinstance(test_dataset, Dataset):\n", - " self.test_dataloader = DataLoader(\n", - " dataset=test_dataset, batch_size=batch_size, shuffle=True, \n", - " num_workers=cpu_count-1, pin_memory=True\n", - " )\n", - " else:\n", - " self.test_dataloader = test_dataset\n", - "\n", - " self.total_train_steps = self.num_epochs * len(self.train_dataloader)\n", - "\n", - " self.optimizer = torch.optim.AdamW(\n", - " model.parameters(), lr=learning_rate, \n", - " weight_decay=weight_decay, betas=(adam_beta1, adam_beta2)\n", - " )\n", - " self.criterion = nn.MSELoss()\n", - "\n", - " def train(self):\n", - " train_loss_curve = []\n", - " eval_loss_curve = []\n", - " eval_error_curve = []\n", - " step = 0\n", - " with tqdm(total=self.total_train_steps) as pbar:\n", - " for epoch in range(self.num_epochs):\n", - " total_train_loss = 0\n", - " for x, targets in self.train_dataloader:\n", - " x = x.to(device=device, dtype=torch.float32)\n", - " targets = targets.to(device=device, dtype=torch.float32)\n", - "\n", - " self.optimizer.zero_grad()\n", - " output = self.model(x)\n", - " loss = self.criterion(output, targets)\n", - " total_train_loss += loss.item()\n", - " if (step + 1) % self.logging_steps == 0:\n", - " train_loss_curve.append((step + 1, loss.item()))\n", - " \n", - " loss.backward()\n", - " self.optimizer.step()\n", - " step += 1\n", - " pbar.update(1)\n", - "\n", - " if self.eval_steps > 0 and (step + 1) % self.eval_steps == 0:\n", - " avg_eval_loss, avg_eval_error = self.eval()\n", - " eval_loss_curve.append((step + 1, avg_eval_loss))\n", - " eval_error_curve.append((step + 1, avg_eval_error))\n", - " eval_info = {\n", - " 'Epoch': f'{(step + 1) / len(self.train_dataloader):.1f}/{self.num_epochs}',\n", - " 'Total Valid Loss': f'{avg_eval_loss:.2f}',\n", - " 'Avg Valid Error': f'{avg_eval_error:.2%}'\n", - " }\n", - " if self.print_eval:\n", - " print(eval_info)\n", - " if self.print_log_epochs > 0 and (epoch + 1) % self.print_log_epochs == 0:\n", - " log_info = {\n", - " 'Epoch': f'{(step + 1) / len(self.train_dataloader):.1f}/{self.num_epochs}',\n", - " 'Total Train Loss': f'{total_train_loss:.2f}'\n", - " }\n", - " print(log_info)\n", - "\n", - " return_info = {}\n", - " if self.test_dataloader:\n", - " test_error = self.test()\n", - " if self.print_test_result:\n", - " print('Avg Test Error:', f'{test_error:.2%}')\n", - " return_info['test_error'] = test_error\n", - " if self.plot:\n", - " self.plot_results(train_loss_curve, eval_loss_curve, eval_error_curve)\n", - " return_info['curves'] = {\n", - " 'train_loss_curve': train_loss_curve,\n", - " 'eval_loss_curve': eval_loss_curve,\n", - " 'eval_error_curve': eval_error_curve\n", - " }\n", - " return return_info\n", - "\n", - " def eval(self):\n", - " total_eval_loss = 0\n", - " total_eval_error = 0\n", - " total_eval_samples = 0\n", - " with torch.inference_mode():\n", - " for x, targets in self.eval_dataloader:\n", - " x = x.to(device=device, dtype=torch.float32)\n", - " targets = targets.to(device=device, dtype=torch.float32)\n", - " output = self.model(x)\n", - " loss = self.criterion(output, targets)\n", - " total_eval_loss += loss.item()\n", - " total_eval_error += torch.square(output - targets).sum().item()\n", - " total_eval_samples += targets.numel()\n", - " avg_eval_loss = total_eval_loss / len(self.eval_dataloader)\n", - " avg_eval_error = total_eval_error / total_eval_samples\n", - " return avg_eval_loss, avg_eval_error\n", - "\n", - " def test(self):\n", - " total_test_error = 0\n", - " total_test_samples = 0\n", - " with torch.inference_mode():\n", - " for x, targets in self.test_dataloader:\n", - " x = x.to(device=device, dtype=torch.float32)\n", - " targets = targets.to(device=device, dtype=torch.float32)\n", - " output = self.model(x)\n", - " total_test_error += torch.square(output - targets).sum().item()\n", - " total_test_samples += targets.numel()\n", - " avg_test_error = total_test_error / total_test_samples\n", - " return avg_test_error\n", - " \n", - " def plot_results(self, train_loss_curve, eval_loss_curve, eval_error_curve):\n", - " fig, axes = plt.subplots(1, 2, figsize=(10, 4))\n", - "\n", - " train_log_steps, train_losses = zip(*train_loss_curve)\n", - " axes[0].plot(train_log_steps, train_losses, label='Training Loss', color='blue')\n", - " eval_log_steps, eval_losses = zip(*eval_loss_curve)\n", - " axes[0].plot(eval_log_steps, eval_losses, label='Validation Loss', color='orange')\n", - " axes[0].set_xlabel('Step')\n", - " axes[0].set_ylabel('Loss')\n", - " axes[0].set_title('Loss Curve')\n", - " axes[0].legend()\n", - " axes[0].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - "\n", - " eval_log_steps, eval_error = zip(*eval_error_curve)\n", - " axes[1].plot(eval_log_steps, eval_error, label='Validation Error', color='red', marker='o')\n", - " axes[1].set_xlabel('Step')\n", - " axes[1].set_ylabel('Error')\n", - " axes[1].set_title('Validation Error Curve')\n", - " axes[1].legend()\n", - " axes[1].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - " \n", - " plt.tight_layout()\n", - " plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "d6d244e8-aa25-4217-a87f-9bed08df6e3d", - "metadata": {}, - "source": [ - "构建模型。" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "4c6e0589-4212-4ed7-8627-5c7fa729c225", - "metadata": {}, - "outputs": [], - "source": [ - "class My_RNN(nn.Module):\n", - " def __init__(self, input_size, hidden_size, output_size):\n", - " super().__init__()\n", - " self.hidden_size = hidden_size\n", - " \n", - " self.w_h = nn.Parameter(torch.rand(input_size, hidden_size))\n", - " self.u_h = nn.Parameter(torch.rand(hidden_size, hidden_size))\n", - " self.b_h = nn.Parameter(torch.zeros(hidden_size))\n", - " \n", - " self.w_y = nn.Parameter(torch.rand(hidden_size, output_size))\n", - " self.b_y = nn.Parameter(torch.zeros(output_size))\n", - " \n", - " self.tanh = nn.Tanh()\n", - " self.leaky_relu = nn.LeakyReLU()\n", - " \n", - " for param in self.parameters():\n", - " if param.dim() > 1:\n", - " nn.init.xavier_uniform_(param)\n", - " \n", - " def forward(self, x):\n", - " batch_size = x.size(0)\n", - " seq_len = x.size(1)\n", - " \n", - " h = torch.zeros(batch_size, self.hidden_size).to(x.device)\n", - " y_list = []\n", - " for i in range(seq_len):\n", - " h = self.tanh(\n", - " torch.matmul(x[:, i, :], self.w_h) + \n", - " torch.matmul(h, self.u_h) + self.b_h\n", - " ) # (batch_size, hidden_size)\n", - " y = self.leaky_relu(torch.matmul(h, self.w_y) + self.b_y) # (batch_size, output_size)\n", - " y_list.append(y)\n", - " return torch.stack(y_list, dim=1), h\n", - " \n", - "\n", - "class Model_1(nn.Module):\n", - " def __init__(self, input_size, hidden_size, output_size):\n", - " super(Model_1, self).__init__()\n", - " self.rnn = My_RNN(input_size, hidden_size, hidden_size).to(device)\n", - " self.relu = nn.LeakyReLU()\n", - " self.fc = nn.Linear(hidden_size, output_size)\n", - "\n", - " def forward(self, x):\n", - " x, _ = self.rnn(x)\n", - " out = self.fc(self.relu(x[:, -1, :]))\n", - " return out" - ] - }, - { - "cell_type": "markdown", - "id": "08695360-e270-4e4a-a796-de1b0d9a79cb", - "metadata": {}, - "source": [ - "训练。" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "f623d77b-546e-4334-9207-2c97438ca2ae", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "a77ab4e55bcc448690736b38e02a097d", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/4000 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "training_args = {\n", - " 'train_dataset': train_dataset,\n", - " 'eval_dataset': valid_dataset,\n", - " 'test_dataset': test_dataset,\n", - " 'learning_rate': 1.0e-6,\n", - " 'num_epochs': 100,\n", - " 'batch_size': 256,\n", - " 'weight_decay': 0.0,\n", - " 'logging_steps': 3,\n", - " 'eval_steps': 500,\n", - " 'print_log_epochs': 0\n", - "}\n", - "\n", - "model = Model_1(input_size=3, hidden_size=512, output_size=1).to(device)\n", - "trainer = Trainer(model=model, **training_args)\n", - "_ = trainer.train()" - ] - }, - { - "cell_type": "markdown", - "id": "c2517ddf-5d7d-4b1d-9b0d-fd3b7199e3b4", - "metadata": {}, - "source": [ - "模型能够正常收敛。最终测试集上,预测值与真实值的误差不超过$0.5\\%$。" - ] - }, - { - "cell_type": "markdown", - "id": "4628114e-5cdd-41fc-92a0-d41f17407ae7", - "metadata": {}, - "source": [ - "# 2. \n", - "\n", - "**使用torch.nn.rnn实现循环神经网络,并在至少一个数据集上进行实验,从训练时间、预测精度、Loss变化等角度分析实验结果(最好使用图表展示)**" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "969bc5e9-3e4f-4ea8-8d26-89541b13e893", - "metadata": {}, - "outputs": [], - "source": [ - "torch.cuda.empty_cache()" - ] - }, - { - "cell_type": "markdown", - "id": "3cced084-7df7-461f-ac72-d446330c3986", - "metadata": {}, - "source": [ - "使用`torch.nn.RNN`替换手动实现的RNN网络模块构建新模型,并进行训练。" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "325edb62-ac49-4d36-9885-d606ce3393b6", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "ff338ac095a243ad98f581f60352e06c", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/4000 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "class Model_2(nn.Module):\n", - " def __init__(self, input_size, hidden_size, output_size):\n", - " super(Model_2, self).__init__()\n", - " self.rnn = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=1, batch_first=True)\n", - " self.relu = nn.LeakyReLU()\n", - " self.fc = nn.Linear(hidden_size, output_size)\n", - "\n", - " def forward(self, x):\n", - " x, _ = self.rnn(x)\n", - " out = self.fc(self.relu(x[:, -1, :]))\n", - " return out\n", - "\n", - "training_args = {\n", - " 'train_dataset': train_dataset,\n", - " 'eval_dataset': valid_dataset,\n", - " 'test_dataset': test_dataset,\n", - " 'learning_rate': 1.0e-6,\n", - " 'num_epochs': 100,\n", - " 'batch_size': 256,\n", - " 'weight_decay': 0.0,\n", - " 'logging_steps': 3,\n", - " 'eval_steps': 500,\n", - " 'print_log_epochs': 0\n", - "}\n", - "\n", - "model = Model_2(input_size=3, hidden_size=512, output_size=1).to(device)\n", - "trainer = Trainer(model=model, **training_args)\n", - "_ = trainer.train()" - ] - }, - { - "cell_type": "markdown", - "id": "a8f4d2a8-c980-4a6a-9dae-ee39024ba0ef", - "metadata": {}, - "source": [ - "最终模型效果相当,最终测试集上,`torch.nn.RNN`实现的模型,预测值与真实值的误差不超过$0.5\\%$。" - ] - }, - { - "cell_type": "markdown", - "id": "6b5230e2-c707-4779-88d7-a1c31d7b1bdc", - "metadata": {}, - "source": [ - "# 3.\n", - "\n", - "**不同超参数的对比分析(包括hidden_size、batchsize、lr等)选其中至少1-2个进行分析**" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "8fd5de2a-98e1-4f0d-a522-3315205dcade", - "metadata": {}, - "outputs": [], - "source": [ - "torch.cuda.empty_cache()" - ] - }, - { - "cell_type": "markdown", - "id": "e377d82e-f28e-4ed8-9244-b02f3db72a36", - "metadata": {}, - "source": [ - "选择`hidden_size`进行分析。" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "37d3cc2d-7374-4944-bb10-407383c7f120", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "模型1(隐藏维度=128)开始训练:\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "4caffaf30e0a40c3a88dad4bc80f51b4", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/4000 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "hidden_sizes = [128, 256, 512, 1024]\n", - "plot_colors = ['blue', 'green', 'orange', 'purple']\n", - "\n", - "fig, axes = plt.subplots(1, 2, figsize=(7, 3.5))\n", - "\n", - "axes[0].set_xlabel('Step')\n", - "axes[0].set_ylabel('Loss')\n", - "axes[0].set_title('Validation Loss Curve')\n", - "axes[0].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - "axes[1].set_xlabel('Step')\n", - "axes[1].set_ylabel('Error')\n", - "axes[1].set_title('Validation Error Curve')\n", - "axes[1].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - "\n", - "training_args = {\n", - " 'train_dataset': train_dataset,\n", - " 'eval_dataset': valid_dataset,\n", - " 'test_dataset': test_dataset,\n", - " 'learning_rate': 1.0e-6,\n", - " 'num_epochs': 100,\n", - " 'batch_size': 256,\n", - " 'weight_decay': 0.0,\n", - " 'logging_steps': 3,\n", - " 'eval_steps': 500,\n", - " 'plot': False,\n", - " 'print_log_epochs': 0,\n", - " 'print_eval': False\n", - "}\n", - "\n", - "for index, hidden_size in enumerate(hidden_sizes):\n", - " model = Model_2(input_size=3, hidden_size=hidden_size, output_size=1).to(device)\n", - " \n", - " print(f\"模型{index + 1}(隐藏维度={hidden_size})开始训练:\")\n", - " trainer = Trainer(model=model, **training_args)\n", - " curves = trainer.train()['curves']\n", - "\n", - " eval_log_steps, eval_losses = zip(*curves['eval_loss_curve'])\n", - " axes[0].plot(\n", - " eval_log_steps, eval_losses,\n", - " label=f\"hidden size={hidden_size}\", color=plot_colors[index]\n", - " )\n", - " eval_log_steps, eval_errors = zip(*curves['eval_error_curve'])\n", - " axes[1].plot(\n", - " eval_log_steps, eval_errors, \n", - " label=f\"hidden size={hidden_size}\", color=plot_colors[index]\n", - " )\n", - "\n", - "axes[0].legend()\n", - "axes[1].legend()\n", - "plt.tight_layout()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "27d099af-822b-4176-90d0-992ec4b57685", - "metadata": {}, - "source": [ - "从收敛过程和测试集结果来看,hidden_size越大,收敛越快,测试结果更优。" - ] - }, - { - "cell_type": "markdown", - "id": "1a8edf64-f507-4b70-b571-411caf4f5884", - "metadata": {}, - "source": [ - "# 3.\n", - "\n", - "**使用PyTorch实现LSTM和GRU并在至少一个数据集进行试验分析**" - ] - }, - { - "cell_type": "markdown", - "id": "4304e3fa-dd89-4f3e-bc83-024e8f37b1f2", - "metadata": {}, - "source": [ - "## 3.1. 实现LSTM" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "671baab1-927c-4941-ab4e-32b25c93c2de", - "metadata": {}, - "outputs": [], - "source": [ - "torch.cuda.empty_cache()" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "5ffdfb73-83c5-4511-9fba-076282dfa1b8", - "metadata": {}, - "outputs": [], - "source": [ - "class My_LSTM(nn. Module):\n", - " def __init__(self, input_size, hidden_size):\n", - " super().__init__()\n", - " self.hidden_size = hidden_size\n", - " self.gates = nn.Linear(input_size + hidden_size, hidden_size * 4)\n", - " self.sigmoid = nn.Sigmoid()\n", - " self.tanh = nn. Tanh()\n", - " for param in self.parameters():\n", - " if param.dim() > 1:\n", - " nn.init.xavier_uniform_(param)\n", - "\n", - " def forward(self, x):\n", - " batch_size = x.size(0)\n", - " seq_len = x.size(1)\n", - " h, c = (torch.zeros(batch_size, self.hidden_size).to(x.device) for _ in range(2))\n", - " y_list = []\n", - " for i in range(seq_len):\n", - " forget_gate, input_gate, output_gate, candidate_cell = \\\n", - " self.gates(torch.cat([x[:, i, :], h], dim=-1)).chunk(4, -1)\n", - " forget_gate, input_gate, output_gate = (\n", - " self.sigmoid(g) for g in (forget_gate, input_gate, output_gate)\n", - " )\n", - " c = forget_gate * c + input_gate * self.tanh(candidate_cell)\n", - " h = output_gate * self.tanh(c)\n", - " y_list.append(h)\n", - " return torch.stack(y_list, dim=1), (h, c)" - ] - }, - { - "cell_type": "markdown", - "id": "03bf9cd0-d40b-4f5d-a61e-7a71a925d808", - "metadata": {}, - "source": [ - "训练。" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "d3943744-72df-48db-a035-c3e76ba68127", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b53f30fc7da24cd9913297e01db708b4", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/4000 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "class Model_3(nn.Module):\n", - " def __init__(self, input_size, hidden_size, output_size):\n", - " super(Model_3, self).__init__()\n", - " self.rnn = My_LSTM(input_size=input_size, hidden_size=hidden_size)\n", - " self.relu = nn.LeakyReLU()\n", - " self.fc = nn.Linear(hidden_size, output_size)\n", - "\n", - " def forward(self, x):\n", - " x, _ = self.rnn(x)\n", - " out = self.fc(self.relu(x[:, -1, :]))\n", - " return out\n", - "\n", - "\n", - "training_args = {\n", - " 'train_dataset': train_dataset,\n", - " 'eval_dataset': valid_dataset,\n", - " 'test_dataset': test_dataset,\n", - " 'learning_rate': 5.0e-5,\n", - " 'num_epochs': 100,\n", - " 'batch_size': 256,\n", - " 'weight_decay': 0.0,\n", - " 'logging_steps': 3,\n", - " 'eval_steps': 500,\n", - " 'print_log_epochs': 0\n", - "}\n", - "\n", - "model = Model_3(input_size=3, hidden_size=512, output_size=1).to(device)\n", - "trainer = Trainer(model=model, **training_args)\n", - "_ = trainer.train()" - ] - }, - { - "cell_type": "markdown", - "id": "fdd61a05-cb1a-4f6e-9d18-c7b6bad6d993", - "metadata": {}, - "source": [ - "模型能正常收敛,且最终测试效果比普通RNN要好。" - ] - }, - { - "cell_type": "markdown", - "id": "198254ba-6c23-483d-9d36-abd31dcffa63", - "metadata": {}, - "source": [ - "## 3.2. 实现GRU" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "5a110b9a-7e70-44ee-8abf-cb204446b673", - "metadata": {}, - "outputs": [], - "source": [ - "torch.cuda.empty_cache()" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "6855317e-b481-473b-bd63-a0318ac8668b", - "metadata": {}, - "outputs": [], - "source": [ - "class My_GRU(nn.Module):\n", - " def __init__(self, input_size, hidden_size):\n", - " super().__init__()\n", - " self.hidden_size = hidden_size\n", - " \n", - " self.gates = nn.Linear(input_size + hidden_size, hidden_size * 2)\n", - " self.hidden_transform = nn.Linear(input_size + hidden_size, hidden_size)\n", - " \n", - " self.sigmoid = nn.Sigmoid()\n", - " self.tanh = nn.Tanh()\n", - " \n", - " for param in self.parameters():\n", - " if param.dim() > 1:\n", - " nn.init.xavier_uniform_(param)\n", - " \n", - " def forward(self, x):\n", - " batch_size = x.size(0)\n", - " seq_len = x.size(1)\n", - " \n", - " h = torch.zeros(batch_size, self.hidden_size).to(x.device)\n", - " y_list = []\n", - " for i in range(seq_len):\n", - " update_gate, reset_gate = self.gates(torch.cat([x[:, i, :], h], dim=-1)).chunk(2, -1)\n", - " update_gate, reset_gate = (self.sigmoid(gate) for gate in (update_gate, reset_gate))\n", - " candidate_hidden = self.tanh(self.hidden_transform(torch.cat([x[:, i, :], reset_gate * h], dim=-1)))\n", - " h = (1-update_gate) * h + update_gate * candidate_hidden\n", - " y_list.append(h)\n", - " return torch.stack(y_list, dim=1), h" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "cb5997bf-cac5-4677-af2f-0d2adc3cef90", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "1e5ebca3bc61477ab0a29e8142bfcb6d", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/4000 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "class Model_4(nn.Module):\n", - " def __init__(self, input_size, hidden_size, output_size):\n", - " super(Model_4, self).__init__()\n", - " self.rnn = My_GRU(input_size=input_size, hidden_size=hidden_size)\n", - " self.relu = nn.LeakyReLU()\n", - " self.fc = nn.Linear(hidden_size, output_size)\n", - "\n", - " def forward(self, x):\n", - " x, _ = self.rnn(x)\n", - " out = self.fc(self.relu(x[:, -1, :]))\n", - " return out\n", - "\n", - "\n", - "training_args = {\n", - " 'train_dataset': train_dataset,\n", - " 'eval_dataset': valid_dataset,\n", - " 'test_dataset': test_dataset,\n", - " 'learning_rate': 2.0e-5,\n", - " 'num_epochs': 100,\n", - " 'batch_size': 256,\n", - " 'weight_decay': 0.0,\n", - " 'logging_steps': 3,\n", - " 'eval_steps': 500,\n", - " 'print_log_epochs': 0\n", - "}\n", - "\n", - "model = Model_4(input_size=3, hidden_size=512, output_size=1).to(device)\n", - "trainer = Trainer(model=model, **training_args)\n", - "_ = trainer.train()" - ] - }, - { - "cell_type": "markdown", - "id": "8f44067d-73a7-4064-bf29-559519542ecc", - "metadata": {}, - "source": [ - "模型正常收敛,且测试集表现比LSTM更好。" - ] - }, - { - "cell_type": "markdown", - "id": "8626b0fd-86b3-41e8-9761-5e56c19e15c1", - "metadata": {}, - "source": [ - "# 4.\n", - "\n", - "**设计实验,对比分析LSTM和GRU在相同数据集上的结果。**" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "4d4d02b3-ddd0-4e5f-9d91-4ba95a0f4c76", - "metadata": {}, - "outputs": [], - "source": [ - "torch.cuda.empty_cache()" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "d7126105-1eb9-45bb-b58a-aa0f36e4a878", - "metadata": {}, - "outputs": [], - "source": [ - "class Model_5(nn.Module):\n", - " def __init__(self, input_size, hidden_size, output_size):\n", - " super(Model_5, self).__init__()\n", - " self.rnn = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=1, batch_first=True)\n", - " self.relu = nn.LeakyReLU()\n", - " self.fc = nn.Linear(hidden_size, output_size)\n", - "\n", - " def forward(self, x):\n", - " x, _ = self.rnn(x)\n", - " out = self.fc(self.relu(x[:, -1, :]))\n", - " return out\n", - "\n", - "class Model_6(nn.Module):\n", - " def __init__(self, input_size, hidden_size, output_size):\n", - " super(Model_6, self).__init__()\n", - " self.rnn = nn.GRU(input_size=input_size, hidden_size=hidden_size, num_layers=1, batch_first=True)\n", - " self.relu = nn.LeakyReLU()\n", - " self.fc = nn.Linear(hidden_size, output_size)\n", - "\n", - " def forward(self, x):\n", - " x, _ = self.rnn(x)\n", - " out = self.fc(self.relu(x[:, -1, :]))\n", - " return out" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "e1197d56-730a-4a74-a853-9d43f050a55a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "模型1(模型架构=LSTM)开始训练:\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "a01515796809407b8568a3eb5a534bd8", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/4000 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "model_archs = [\"LSTM\", \"GRU\"]\n", - "plot_colors = ['green', 'orange']\n", - "\n", - "fig, axes = plt.subplots(1, 2, figsize=(7, 3.5))\n", - "\n", - "axes[0].set_xlabel('Step')\n", - "axes[0].set_ylabel('Loss')\n", - "axes[0].set_title('Validation Loss Curve')\n", - "axes[0].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - "axes[1].set_xlabel('Step')\n", - "axes[1].set_ylabel('Error')\n", - "axes[1].set_title('Validation Error Curve')\n", - "axes[1].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", - "\n", - "training_args = {\n", - " 'train_dataset': train_dataset,\n", - " 'eval_dataset': valid_dataset,\n", - " 'test_dataset': test_dataset,\n", - " 'learning_rate': 1.0e-5,\n", - " 'num_epochs': 100,\n", - " 'batch_size': 256,\n", - " 'weight_decay': 0.0,\n", - " 'logging_steps': 3,\n", - " 'eval_steps': 500,\n", - " 'plot': False,\n", - " 'print_log_epochs': 0,\n", - " 'print_eval': False\n", - "}\n", - "\n", - "for index, model_arch in enumerate(model_archs):\n", - " model = (\n", - " Model_5(input_size=3, hidden_size=512, output_size=1)\n", - " if model_arch == \"LSTM\" else\n", - " Model_6(input_size=3, hidden_size=512, output_size=1)\n", - " ).to(device)\n", - " \n", - " print(f\"模型{index + 1}(模型架构={model_arch})开始训练:\")\n", - " trainer = Trainer(model=model, **training_args)\n", - " curves = trainer.train()['curves']\n", - "\n", - " eval_log_steps, eval_losses = zip(*curves['eval_loss_curve'])\n", - " axes[0].plot(\n", - " eval_log_steps, eval_losses,\n", - " label=f\"model arch={model_arch}\", color=plot_colors[index]\n", - " )\n", - " eval_log_steps, eval_errors = zip(*curves['eval_error_curve'])\n", - " axes[1].plot(\n", - " eval_log_steps, eval_errors, \n", - " label=f\"model arch={model_arch}\", color=plot_colors[index]\n", - " )\n", - "\n", - "axes[0].legend()\n", - "axes[1].legend()\n", - "plt.tight_layout()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "6290b2f4-6346-44d3-b79f-7ed78279425a", - "metadata": {}, - "source": [ - "收敛曲线和测试集实验结果都表明,GRU比LSTM能力更优,且运行速度更快。" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}