{ "cells": [ { "cell_type": "markdown", "id": "11a0e575-4d40-4889-ba1b-e522ed3c6c61", "metadata": {}, "source": [ "

研究生《深度学习》课程
实验报告

\n", "
\n", "
课程名称:深度学习 M502019B
\n", "
实验题目:循环神经网络实验
\n", "
学号:25120323
\n", "
姓名:柯劲帆
\n", "
授课老师:原继东
\n", "
报告日期:2025年8月27日
\n", "
" ] }, { "cell_type": "code", "execution_count": 1, "id": "24298f69-4181-4d19-a5b5-324c73b572ed", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Pytorch version: 2.7.1+cu118\n", "CUDA version: 11.8\n", "CUDA device count: 1\n", "CUDA device name: NVIDIA TITAN Xp\n", "CUDA device capability: (6, 1)\n", "CUDA device memory: 11.90 GB\n", "CPU count: 8\n" ] } ], "source": [ "import os\n", "import numpy as np\n", "import torch\n", "from torch.autograd import Variable\n", "from torch.utils.data import Dataset, DataLoader, Subset, random_split\n", "from torch import nn\n", "from torchvision import datasets, transforms\n", "from PIL import Image\n", "from multiprocessing import cpu_count\n", "import matplotlib.pyplot as plt\n", "from tqdm.notebook import tqdm\n", "import pandas as pd\n", "import collections\n", "from typing import Literal, Union, Optional, List\n", "\n", "print('Pytorch version:',torch.__version__)\n", "if not torch.cuda.is_available():\n", " print('CUDA is_available:', torch.cuda.is_available())\n", "else:\n", " print('CUDA version:', torch.version.cuda)\n", " print('CUDA device count:', torch.cuda.device_count())\n", " print('CUDA device name:', torch.cuda.get_device_name())\n", " print('CUDA device capability:', torch.cuda.get_device_capability())\n", " print('CUDA device memory:', f'{torch.cuda.get_device_properties(0).total_memory/1024/1024/1024:.2f}', 'GB')\n", "print('CPU count:', cpu_count())\n", "\n", "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", "seed = 42\n", "np.random.seed(seed)\n", "torch.manual_seed(seed)\n", "torch.cuda.manual_seed(seed)\n", "cpu_count = cpu_count()" ] }, { "cell_type": "markdown", "id": "39399543-2bcb-49d3-a7cd-601b69e5083a", "metadata": {}, "source": [ "# 1. \n", "\n", "**手动实现循环神经网络RNN,并在至少一个数据集上进行实验,从训练时间、预测精度、Loss变化等角度分析实验结果(最好使用图表展示)**" ] }, { "cell_type": "code", "execution_count": 2, "id": "84193537-6555-4b67-8bd5-5e0dc83a635b", "metadata": {}, "outputs": [], "source": [ "torch.cuda.empty_cache()" ] }, { "cell_type": "markdown", "id": "01f7e57a-ad71-4ebd-ab90-6d1f0ee35ad9", "metadata": {}, "source": [ "构建数据集。" ] }, { "cell_type": "code", "execution_count": 3, "id": "81cf14e0-3202-425c-8c34-c4699c893f7d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "训练集第1个样本输入:tensor([[0.4480, 0.1673, 0.3450],\n", " [0.5020, 0.1792, 0.3330],\n", " [0.5060, 0.1808, 0.3340],\n", " [0.4760, 0.1613, 0.3400],\n", " [0.4710, 0.1740, 0.3460],\n", " [0.4880, 0.1926, 0.3310],\n", " [0.4550, 0.2177, 0.3180],\n", " [0.4830, 0.1996, 0.3140],\n", " [0.4730, 0.1897, 0.3230],\n", " [0.4600, 0.1704, 0.3240],\n", " [0.5380, 0.1502, 0.3600],\n", " [0.5450, 0.1470, 0.3810],\n", " [0.4950, 0.1628, 0.3670],\n", " [0.4600, 0.1744, 0.3450],\n", " [0.4730, 0.1739, 0.3470],\n", " [0.4780, 0.1833, 0.3320],\n", " [0.4490, 0.1705, 0.3360],\n", " [0.5150, 0.1865, 0.3330],\n", " [0.4570, 0.1795, 0.3280],\n", " [0.5160, 0.1755, 0.3410],\n", " [0.4870, 0.1775, 0.3370],\n", " [0.4340, 0.1677, 0.3280],\n", " [0.4820, 0.1656, 0.3320],\n", " [0.5080, 0.1626, 0.3420],\n", " [0.4810, 0.1546, 0.3630],\n", " [0.5070, 0.1399, 0.3870],\n", " [0.5200, 0.1369, 0.3980],\n", " [0.4300, 0.0868, 0.4370],\n", " [0.5030, 0.0869, 0.5010],\n", " [0.5280, 0.0906, 0.5300],\n", " [0.5230, 0.0869, 0.5550],\n", " [0.4540, 0.0741, 0.5720]])\n", "训练集第1个样本标签:tensor([0.5000])\n" ] } ], "source": [ "class TrafficDataset(Dataset):\n", " def __init__(self, inputs, targets):\n", " self.inputs = torch.Tensor(inputs)\n", " # print(self.inputs.shape)\n", " self.targets = torch.Tensor(targets).unsqueeze(1)\n", " # print(self.targets.shape)\n", "\n", " def __getitem__(self, index):\n", " return self.inputs[index], self.targets[index]\n", "\n", " def __len__(self):\n", " return self.targets.shape[0]\n", "\n", "\n", "def make_traffic_datasets(\n", " file_path: str, sensor: int = 10, target: int = 0, \n", " train_por = 0.6, test_por = 0.2, window_size = 32, label_col = 0\n", "):\n", " raw_data = np.load(file_path)['data']\n", " scaled_data = raw_data * np.array([1.0e-3, 1.0, 1.0e-2])\n", " sensor_data = scaled_data[:, sensor, :]\n", "\n", " window_inputs = np.stack([\n", " sensor_data[i : i + window_size] \n", " for i in range(len(sensor_data) - window_size - 1)\n", " ], axis=0)\n", " labels = sensor_data[window_size:, label_col]\n", "\n", " shuffle_idx = np.arange(len(window_inputs))\n", " np.random.shuffle(shuffle_idx)\n", " window_inputs = window_inputs[shuffle_idx]\n", " labels = labels[shuffle_idx]\n", "\n", " len_train = int(len(labels) * train_por)\n", " len_test = int(len(labels) * test_por)\n", " len_valid = len(labels) - len_train - len_test\n", "\n", " train_dataset = TrafficDataset(inputs=window_inputs[:len_train, :], targets=labels[:len_train])\n", " valid_dataset = TrafficDataset(inputs=window_inputs[len_train:len_train+len_valid, :], targets=labels[len_train:len_train+len_valid])\n", " test_dataset = TrafficDataset(inputs=window_inputs[len_train+len_valid:, :], targets=labels[len_train+len_valid:])\n", "\n", " return train_dataset, valid_dataset, test_dataset\n", "\n", "\n", "train_dataset, valid_dataset, test_dataset = make_traffic_datasets('./dataset/traffic-flow/raw/PEMS04.npz')\n", "x, y = train_dataset[0]\n", "print(f\"训练集第1个样本输入:{x}\")\n", "print(f\"训练集第1个样本标签:{y}\")" ] }, { "cell_type": "markdown", "id": "07177026-fb83-4f9e-b462-bc7f503f3040", "metadata": {}, "source": [ "构建序列回归任务的Trainer。" ] }, { "cell_type": "code", "execution_count": 4, "id": "13e71bcd-3005-4bd1-aed5-73624e3c8f15", "metadata": {}, "outputs": [], "source": [ "class Trainer():\n", " def __init__(\n", " self,\n", " model,\n", " train_dataset: Union[Dataset, DataLoader],\n", " eval_dataset: Union[Dataset, DataLoader],\n", " learning_rate: float,\n", " num_epochs: int,\n", " batch_size: int,\n", " weight_decay: float = 0.0,\n", " adam_beta1: float = 0.9,\n", " adam_beta2: float = 0.999,\n", " test_dataset: Union[Dataset, DataLoader] = None,\n", " plot: bool = True, \n", " print_test_result: bool = True,\n", " logging_steps: int = 1,\n", " eval_steps: int = 1,\n", " print_log_epochs: int = 1,\n", " print_eval: bool = True\n", " ):\n", " self.model = model\n", " self.learning_rate = learning_rate\n", " self.num_epochs = num_epochs\n", " self.batch_size = batch_size\n", " self.plot = plot\n", " self.print_test_result = print_test_result\n", " self.logging_steps = logging_steps\n", " self.eval_steps = eval_steps\n", " self.print_log_epochs = print_log_epochs\n", " self.print_eval = print_eval\n", " \n", " if isinstance(train_dataset, Dataset):\n", " self.train_dataloader = DataLoader(\n", " dataset=train_dataset, batch_size=batch_size, shuffle=True, \n", " num_workers=cpu_count-1, pin_memory=True\n", " )\n", " else:\n", " self.train_dataloader = train_dataset\n", " if isinstance(eval_dataset, Dataset):\n", " self.eval_dataloader = DataLoader(\n", " dataset=eval_dataset, batch_size=batch_size, shuffle=True, \n", " num_workers=cpu_count-1, pin_memory=True\n", " )\n", " else:\n", " self.eval_dataloader = eval_dataset\n", " if isinstance(test_dataset, Dataset):\n", " self.test_dataloader = DataLoader(\n", " dataset=test_dataset, batch_size=batch_size, shuffle=True, \n", " num_workers=cpu_count-1, pin_memory=True\n", " )\n", " else:\n", " self.test_dataloader = test_dataset\n", "\n", " self.total_train_steps = self.num_epochs * len(self.train_dataloader)\n", "\n", " self.optimizer = torch.optim.AdamW(\n", " model.parameters(), lr=learning_rate, \n", " weight_decay=weight_decay, betas=(adam_beta1, adam_beta2)\n", " )\n", " self.criterion = nn.MSELoss()\n", "\n", " def train(self):\n", " train_loss_curve = []\n", " eval_loss_curve = []\n", " eval_error_curve = []\n", " step = 0\n", " with tqdm(total=self.total_train_steps) as pbar:\n", " for epoch in range(self.num_epochs):\n", " total_train_loss = 0\n", " for x, targets in self.train_dataloader:\n", " x = x.to(device=device, dtype=torch.float32)\n", " targets = targets.to(device=device, dtype=torch.float32)\n", "\n", " self.optimizer.zero_grad()\n", " output = self.model(x)\n", " loss = self.criterion(output, targets)\n", " total_train_loss += loss.item()\n", " if (step + 1) % self.logging_steps == 0:\n", " train_loss_curve.append((step + 1, loss.item()))\n", " \n", " loss.backward()\n", " self.optimizer.step()\n", " step += 1\n", " pbar.update(1)\n", "\n", " if self.eval_steps > 0 and (step + 1) % self.eval_steps == 0:\n", " avg_eval_loss, avg_eval_error = self.eval()\n", " eval_loss_curve.append((step + 1, avg_eval_loss))\n", " eval_error_curve.append((step + 1, avg_eval_error))\n", " eval_info = {\n", " 'Epoch': f'{(step + 1) / len(self.train_dataloader):.1f}/{self.num_epochs}',\n", " 'Total Valid Loss': f'{avg_eval_loss:.2f}',\n", " 'Avg Valid Error': f'{avg_eval_error:.2%}'\n", " }\n", " if self.print_eval:\n", " print(eval_info)\n", " if self.print_log_epochs > 0 and (epoch + 1) % self.print_log_epochs == 0:\n", " log_info = {\n", " 'Epoch': f'{(step + 1) / len(self.train_dataloader):.1f}/{self.num_epochs}',\n", " 'Total Train Loss': f'{total_train_loss:.2f}'\n", " }\n", " print(log_info)\n", "\n", " return_info = {}\n", " if self.test_dataloader:\n", " test_error = self.test()\n", " if self.print_test_result:\n", " print('Avg Test Error:', f'{test_error:.2%}')\n", " return_info['test_error'] = test_error\n", " if self.plot:\n", " self.plot_results(train_loss_curve, eval_loss_curve, eval_error_curve)\n", " return_info['curves'] = {\n", " 'train_loss_curve': train_loss_curve,\n", " 'eval_loss_curve': eval_loss_curve,\n", " 'eval_error_curve': eval_error_curve\n", " }\n", " return return_info\n", "\n", " def eval(self):\n", " total_eval_loss = 0\n", " total_eval_error = 0\n", " total_eval_samples = 0\n", " with torch.inference_mode():\n", " for x, targets in self.eval_dataloader:\n", " x = x.to(device=device, dtype=torch.float32)\n", " targets = targets.to(device=device, dtype=torch.float32)\n", " output = self.model(x)\n", " loss = self.criterion(output, targets)\n", " total_eval_loss += loss.item()\n", " total_eval_error += torch.square(output - targets).sum().item()\n", " total_eval_samples += targets.numel()\n", " avg_eval_loss = total_eval_loss / len(self.eval_dataloader)\n", " avg_eval_error = total_eval_error / total_eval_samples\n", " return avg_eval_loss, avg_eval_error\n", "\n", " def test(self):\n", " total_test_error = 0\n", " total_test_samples = 0\n", " with torch.inference_mode():\n", " for x, targets in self.test_dataloader:\n", " x = x.to(device=device, dtype=torch.float32)\n", " targets = targets.to(device=device, dtype=torch.float32)\n", " output = self.model(x)\n", " total_test_error += torch.square(output - targets).sum().item()\n", " total_test_samples += targets.numel()\n", " avg_test_error = total_test_error / total_test_samples\n", " return avg_test_error\n", " \n", " def plot_results(self, train_loss_curve, eval_loss_curve, eval_error_curve):\n", " fig, axes = plt.subplots(1, 2, figsize=(10, 4))\n", "\n", " train_log_steps, train_losses = zip(*train_loss_curve)\n", " axes[0].plot(train_log_steps, train_losses, label='Training Loss', color='blue')\n", " eval_log_steps, eval_losses = zip(*eval_loss_curve)\n", " axes[0].plot(eval_log_steps, eval_losses, label='Validation Loss', color='orange')\n", " axes[0].set_xlabel('Step')\n", " axes[0].set_ylabel('Loss')\n", " axes[0].set_title('Loss Curve')\n", " axes[0].legend()\n", " axes[0].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", "\n", " eval_log_steps, eval_error = zip(*eval_error_curve)\n", " axes[1].plot(eval_log_steps, eval_error, label='Validation Error', color='red', marker='o')\n", " axes[1].set_xlabel('Step')\n", " axes[1].set_ylabel('Error')\n", " axes[1].set_title('Validation Error Curve')\n", " axes[1].legend()\n", " axes[1].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", " \n", " plt.tight_layout()\n", " plt.show()" ] }, { "cell_type": "markdown", "id": "d6d244e8-aa25-4217-a87f-9bed08df6e3d", "metadata": {}, "source": [ "构建模型。" ] }, { "cell_type": "code", "execution_count": 5, "id": "4c6e0589-4212-4ed7-8627-5c7fa729c225", "metadata": {}, "outputs": [], "source": [ "class My_RNN(nn.Module):\n", " def __init__(self, input_size, hidden_size, output_size):\n", " super().__init__()\n", " self.hidden_size = hidden_size\n", " \n", " self.w_h = nn.Parameter(torch.rand(input_size, hidden_size))\n", " self.u_h = nn.Parameter(torch.rand(hidden_size, hidden_size))\n", " self.b_h = nn.Parameter(torch.zeros(hidden_size))\n", " \n", " self.w_y = nn.Parameter(torch.rand(hidden_size, output_size))\n", " self.b_y = nn.Parameter(torch.zeros(output_size))\n", " \n", " self.tanh = nn.Tanh()\n", " self.leaky_relu = nn.LeakyReLU()\n", " \n", " for param in self.parameters():\n", " if param.dim() > 1:\n", " nn.init.xavier_uniform_(param)\n", " \n", " def forward(self, x):\n", " batch_size = x.size(0)\n", " seq_len = x.size(1)\n", " \n", " h = torch.zeros(batch_size, self.hidden_size).to(x.device)\n", " y_list = []\n", " for i in range(seq_len):\n", " h = self.tanh(\n", " torch.matmul(x[:, i, :], self.w_h) + \n", " torch.matmul(h, self.u_h) + self.b_h\n", " ) # (batch_size, hidden_size)\n", " y = self.leaky_relu(torch.matmul(h, self.w_y) + self.b_y) # (batch_size, output_size)\n", " y_list.append(y)\n", " return torch.stack(y_list, dim=1), h\n", " \n", "\n", "class Model_1(nn.Module):\n", " def __init__(self, input_size, hidden_size, output_size):\n", " super(Model_1, self).__init__()\n", " self.rnn = My_RNN(input_size, hidden_size, hidden_size).to(device)\n", " self.relu = nn.LeakyReLU()\n", " self.fc = nn.Linear(hidden_size, output_size)\n", "\n", " def forward(self, x):\n", " x, _ = self.rnn(x)\n", " out = self.fc(self.relu(x[:, -1, :]))\n", " return out" ] }, { "cell_type": "markdown", "id": "08695360-e270-4e4a-a796-de1b0d9a79cb", "metadata": {}, "source": [ "训练。" ] }, { "cell_type": "code", "execution_count": 6, "id": "f623d77b-546e-4334-9207-2c97438ca2ae", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a77ab4e55bcc448690736b38e02a097d", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/4000 [00:00" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "training_args = {\n", " 'train_dataset': train_dataset,\n", " 'eval_dataset': valid_dataset,\n", " 'test_dataset': test_dataset,\n", " 'learning_rate': 1.0e-6,\n", " 'num_epochs': 100,\n", " 'batch_size': 256,\n", " 'weight_decay': 0.0,\n", " 'logging_steps': 3,\n", " 'eval_steps': 500,\n", " 'print_log_epochs': 0\n", "}\n", "\n", "model = Model_1(input_size=3, hidden_size=512, output_size=1).to(device)\n", "trainer = Trainer(model=model, **training_args)\n", "_ = trainer.train()" ] }, { "cell_type": "markdown", "id": "c2517ddf-5d7d-4b1d-9b0d-fd3b7199e3b4", "metadata": {}, "source": [ "模型能够正常收敛。最终测试集上,预测值与真实值的误差不超过$0.5\\%$。" ] }, { "cell_type": "markdown", "id": "4628114e-5cdd-41fc-92a0-d41f17407ae7", "metadata": {}, "source": [ "# 2. \n", "\n", "**使用torch.nn.rnn实现循环神经网络,并在至少一个数据集上进行实验,从训练时间、预测精度、Loss变化等角度分析实验结果(最好使用图表展示)**" ] }, { "cell_type": "code", "execution_count": 7, "id": "969bc5e9-3e4f-4ea8-8d26-89541b13e893", "metadata": {}, "outputs": [], "source": [ "torch.cuda.empty_cache()" ] }, { "cell_type": "markdown", "id": "3cced084-7df7-461f-ac72-d446330c3986", "metadata": {}, "source": [ "使用`torch.nn.RNN`替换手动实现的RNN网络模块构建新模型,并进行训练。" ] }, { "cell_type": "code", "execution_count": 8, "id": "325edb62-ac49-4d36-9885-d606ce3393b6", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ff338ac095a243ad98f581f60352e06c", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/4000 [00:00" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "class Model_2(nn.Module):\n", " def __init__(self, input_size, hidden_size, output_size):\n", " super(Model_2, self).__init__()\n", " self.rnn = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=1, batch_first=True)\n", " self.relu = nn.LeakyReLU()\n", " self.fc = nn.Linear(hidden_size, output_size)\n", "\n", " def forward(self, x):\n", " x, _ = self.rnn(x)\n", " out = self.fc(self.relu(x[:, -1, :]))\n", " return out\n", "\n", "training_args = {\n", " 'train_dataset': train_dataset,\n", " 'eval_dataset': valid_dataset,\n", " 'test_dataset': test_dataset,\n", " 'learning_rate': 1.0e-6,\n", " 'num_epochs': 100,\n", " 'batch_size': 256,\n", " 'weight_decay': 0.0,\n", " 'logging_steps': 3,\n", " 'eval_steps': 500,\n", " 'print_log_epochs': 0\n", "}\n", "\n", "model = Model_2(input_size=3, hidden_size=512, output_size=1).to(device)\n", "trainer = Trainer(model=model, **training_args)\n", "_ = trainer.train()" ] }, { "cell_type": "markdown", "id": "a8f4d2a8-c980-4a6a-9dae-ee39024ba0ef", "metadata": {}, "source": [ "最终模型效果相当,最终测试集上,`torch.nn.RNN`实现的模型,预测值与真实值的误差不超过$0.5\\%$。" ] }, { "cell_type": "markdown", "id": "6b5230e2-c707-4779-88d7-a1c31d7b1bdc", "metadata": {}, "source": [ "# 3.\n", "\n", "**不同超参数的对比分析(包括hidden_size、batchsize、lr等)选其中至少1-2个进行分析**" ] }, { "cell_type": "code", "execution_count": 9, "id": "8fd5de2a-98e1-4f0d-a522-3315205dcade", "metadata": {}, "outputs": [], "source": [ "torch.cuda.empty_cache()" ] }, { "cell_type": "markdown", "id": "e377d82e-f28e-4ed8-9244-b02f3db72a36", "metadata": {}, "source": [ "选择`hidden_size`进行分析。" ] }, { "cell_type": "code", "execution_count": 10, "id": "37d3cc2d-7374-4944-bb10-407383c7f120", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "模型1(隐藏维度=128)开始训练:\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4caffaf30e0a40c3a88dad4bc80f51b4", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/4000 [00:00" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "hidden_sizes = [128, 256, 512, 1024]\n", "plot_colors = ['blue', 'green', 'orange', 'purple']\n", "\n", "fig, axes = plt.subplots(1, 2, figsize=(7, 3.5))\n", "\n", "axes[0].set_xlabel('Step')\n", "axes[0].set_ylabel('Loss')\n", "axes[0].set_title('Validation Loss Curve')\n", "axes[0].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", "axes[1].set_xlabel('Step')\n", "axes[1].set_ylabel('Error')\n", "axes[1].set_title('Validation Error Curve')\n", "axes[1].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", "\n", "training_args = {\n", " 'train_dataset': train_dataset,\n", " 'eval_dataset': valid_dataset,\n", " 'test_dataset': test_dataset,\n", " 'learning_rate': 1.0e-6,\n", " 'num_epochs': 100,\n", " 'batch_size': 256,\n", " 'weight_decay': 0.0,\n", " 'logging_steps': 3,\n", " 'eval_steps': 500,\n", " 'plot': False,\n", " 'print_log_epochs': 0,\n", " 'print_eval': False\n", "}\n", "\n", "for index, hidden_size in enumerate(hidden_sizes):\n", " model = Model_2(input_size=3, hidden_size=hidden_size, output_size=1).to(device)\n", " \n", " print(f\"模型{index + 1}(隐藏维度={hidden_size})开始训练:\")\n", " trainer = Trainer(model=model, **training_args)\n", " curves = trainer.train()['curves']\n", "\n", " eval_log_steps, eval_losses = zip(*curves['eval_loss_curve'])\n", " axes[0].plot(\n", " eval_log_steps, eval_losses,\n", " label=f\"hidden size={hidden_size}\", color=plot_colors[index]\n", " )\n", " eval_log_steps, eval_errors = zip(*curves['eval_error_curve'])\n", " axes[1].plot(\n", " eval_log_steps, eval_errors, \n", " label=f\"hidden size={hidden_size}\", color=plot_colors[index]\n", " )\n", "\n", "axes[0].legend()\n", "axes[1].legend()\n", "plt.tight_layout()\n", "plt.show()" ] }, { "cell_type": "markdown", "id": "27d099af-822b-4176-90d0-992ec4b57685", "metadata": {}, "source": [ "从收敛过程和测试集结果来看,hidden_size越大,收敛越快,测试结果更优。" ] }, { "cell_type": "markdown", "id": "1a8edf64-f507-4b70-b571-411caf4f5884", "metadata": {}, "source": [ "# 3.\n", "\n", "**使用PyTorch实现LSTM和GRU并在至少一个数据集进行试验分析**" ] }, { "cell_type": "markdown", "id": "4304e3fa-dd89-4f3e-bc83-024e8f37b1f2", "metadata": {}, "source": [ "## 3.1. 实现LSTM" ] }, { "cell_type": "code", "execution_count": 11, "id": "671baab1-927c-4941-ab4e-32b25c93c2de", "metadata": {}, "outputs": [], "source": [ "torch.cuda.empty_cache()" ] }, { "cell_type": "code", "execution_count": 12, "id": "5ffdfb73-83c5-4511-9fba-076282dfa1b8", "metadata": {}, "outputs": [], "source": [ "class My_LSTM(nn. Module):\n", " def __init__(self, input_size, hidden_size):\n", " super().__init__()\n", " self.hidden_size = hidden_size\n", " self.gates = nn.Linear(input_size + hidden_size, hidden_size * 4)\n", " self.sigmoid = nn.Sigmoid()\n", " self.tanh = nn. Tanh()\n", " for param in self.parameters():\n", " if param.dim() > 1:\n", " nn.init.xavier_uniform_(param)\n", "\n", " def forward(self, x):\n", " batch_size = x.size(0)\n", " seq_len = x.size(1)\n", " h, c = (torch.zeros(batch_size, self.hidden_size).to(x.device) for _ in range(2))\n", " y_list = []\n", " for i in range(seq_len):\n", " forget_gate, input_gate, output_gate, candidate_cell = \\\n", " self.gates(torch.cat([x[:, i, :], h], dim=-1)).chunk(4, -1)\n", " forget_gate, input_gate, output_gate = (\n", " self.sigmoid(g) for g in (forget_gate, input_gate, output_gate)\n", " )\n", " c = forget_gate * c + input_gate * self.tanh(candidate_cell)\n", " h = output_gate * self.tanh(c)\n", " y_list.append(h)\n", " return torch.stack(y_list, dim=1), (h, c)" ] }, { "cell_type": "markdown", "id": "03bf9cd0-d40b-4f5d-a61e-7a71a925d808", "metadata": {}, "source": [ "训练。" ] }, { "cell_type": "code", "execution_count": 13, "id": "d3943744-72df-48db-a035-c3e76ba68127", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b53f30fc7da24cd9913297e01db708b4", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/4000 [00:00" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "class Model_3(nn.Module):\n", " def __init__(self, input_size, hidden_size, output_size):\n", " super(Model_3, self).__init__()\n", " self.rnn = My_LSTM(input_size=input_size, hidden_size=hidden_size)\n", " self.relu = nn.LeakyReLU()\n", " self.fc = nn.Linear(hidden_size, output_size)\n", "\n", " def forward(self, x):\n", " x, _ = self.rnn(x)\n", " out = self.fc(self.relu(x[:, -1, :]))\n", " return out\n", "\n", "\n", "training_args = {\n", " 'train_dataset': train_dataset,\n", " 'eval_dataset': valid_dataset,\n", " 'test_dataset': test_dataset,\n", " 'learning_rate': 5.0e-5,\n", " 'num_epochs': 100,\n", " 'batch_size': 256,\n", " 'weight_decay': 0.0,\n", " 'logging_steps': 3,\n", " 'eval_steps': 500,\n", " 'print_log_epochs': 0\n", "}\n", "\n", "model = Model_3(input_size=3, hidden_size=512, output_size=1).to(device)\n", "trainer = Trainer(model=model, **training_args)\n", "_ = trainer.train()" ] }, { "cell_type": "markdown", "id": "fdd61a05-cb1a-4f6e-9d18-c7b6bad6d993", "metadata": {}, "source": [ "模型能正常收敛,且最终测试效果比普通RNN要好。" ] }, { "cell_type": "markdown", "id": "198254ba-6c23-483d-9d36-abd31dcffa63", "metadata": {}, "source": [ "## 3.2. 实现GRU" ] }, { "cell_type": "code", "execution_count": 14, "id": "5a110b9a-7e70-44ee-8abf-cb204446b673", "metadata": {}, "outputs": [], "source": [ "torch.cuda.empty_cache()" ] }, { "cell_type": "code", "execution_count": 15, "id": "6855317e-b481-473b-bd63-a0318ac8668b", "metadata": {}, "outputs": [], "source": [ "class My_GRU(nn.Module):\n", " def __init__(self, input_size, hidden_size):\n", " super().__init__()\n", " self.hidden_size = hidden_size\n", " \n", " self.gates = nn.Linear(input_size + hidden_size, hidden_size * 2)\n", " self.hidden_transform = nn.Linear(input_size + hidden_size, hidden_size)\n", " \n", " self.sigmoid = nn.Sigmoid()\n", " self.tanh = nn.Tanh()\n", " \n", " for param in self.parameters():\n", " if param.dim() > 1:\n", " nn.init.xavier_uniform_(param)\n", " \n", " def forward(self, x):\n", " batch_size = x.size(0)\n", " seq_len = x.size(1)\n", " \n", " h = torch.zeros(batch_size, self.hidden_size).to(x.device)\n", " y_list = []\n", " for i in range(seq_len):\n", " update_gate, reset_gate = self.gates(torch.cat([x[:, i, :], h], dim=-1)).chunk(2, -1)\n", " update_gate, reset_gate = (self.sigmoid(gate) for gate in (update_gate, reset_gate))\n", " candidate_hidden = self.tanh(self.hidden_transform(torch.cat([x[:, i, :], reset_gate * h], dim=-1)))\n", " h = (1-update_gate) * h + update_gate * candidate_hidden\n", " y_list.append(h)\n", " return torch.stack(y_list, dim=1), h" ] }, { "cell_type": "code", "execution_count": 16, "id": "cb5997bf-cac5-4677-af2f-0d2adc3cef90", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1e5ebca3bc61477ab0a29e8142bfcb6d", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/4000 [00:00" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "class Model_4(nn.Module):\n", " def __init__(self, input_size, hidden_size, output_size):\n", " super(Model_4, self).__init__()\n", " self.rnn = My_GRU(input_size=input_size, hidden_size=hidden_size)\n", " self.relu = nn.LeakyReLU()\n", " self.fc = nn.Linear(hidden_size, output_size)\n", "\n", " def forward(self, x):\n", " x, _ = self.rnn(x)\n", " out = self.fc(self.relu(x[:, -1, :]))\n", " return out\n", "\n", "\n", "training_args = {\n", " 'train_dataset': train_dataset,\n", " 'eval_dataset': valid_dataset,\n", " 'test_dataset': test_dataset,\n", " 'learning_rate': 2.0e-5,\n", " 'num_epochs': 100,\n", " 'batch_size': 256,\n", " 'weight_decay': 0.0,\n", " 'logging_steps': 3,\n", " 'eval_steps': 500,\n", " 'print_log_epochs': 0\n", "}\n", "\n", "model = Model_4(input_size=3, hidden_size=512, output_size=1).to(device)\n", "trainer = Trainer(model=model, **training_args)\n", "_ = trainer.train()" ] }, { "cell_type": "markdown", "id": "8f44067d-73a7-4064-bf29-559519542ecc", "metadata": {}, "source": [ "模型正常收敛,且测试集表现比LSTM更好。" ] }, { "cell_type": "markdown", "id": "8626b0fd-86b3-41e8-9761-5e56c19e15c1", "metadata": {}, "source": [ "# 4.\n", "\n", "**设计实验,对比分析LSTM和GRU在相同数据集上的结果。**" ] }, { "cell_type": "code", "execution_count": 17, "id": "4d4d02b3-ddd0-4e5f-9d91-4ba95a0f4c76", "metadata": {}, "outputs": [], "source": [ "torch.cuda.empty_cache()" ] }, { "cell_type": "code", "execution_count": 18, "id": "d7126105-1eb9-45bb-b58a-aa0f36e4a878", "metadata": {}, "outputs": [], "source": [ "class Model_5(nn.Module):\n", " def __init__(self, input_size, hidden_size, output_size):\n", " super(Model_5, self).__init__()\n", " self.rnn = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=1, batch_first=True)\n", " self.relu = nn.LeakyReLU()\n", " self.fc = nn.Linear(hidden_size, output_size)\n", "\n", " def forward(self, x):\n", " x, _ = self.rnn(x)\n", " out = self.fc(self.relu(x[:, -1, :]))\n", " return out\n", "\n", "class Model_6(nn.Module):\n", " def __init__(self, input_size, hidden_size, output_size):\n", " super(Model_6, self).__init__()\n", " self.rnn = nn.GRU(input_size=input_size, hidden_size=hidden_size, num_layers=1, batch_first=True)\n", " self.relu = nn.LeakyReLU()\n", " self.fc = nn.Linear(hidden_size, output_size)\n", "\n", " def forward(self, x):\n", " x, _ = self.rnn(x)\n", " out = self.fc(self.relu(x[:, -1, :]))\n", " return out" ] }, { "cell_type": "code", "execution_count": 19, "id": "e1197d56-730a-4a74-a853-9d43f050a55a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "模型1(模型架构=LSTM)开始训练:\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a01515796809407b8568a3eb5a534bd8", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/4000 [00:00" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "model_archs = [\"LSTM\", \"GRU\"]\n", "plot_colors = ['green', 'orange']\n", "\n", "fig, axes = plt.subplots(1, 2, figsize=(7, 3.5))\n", "\n", "axes[0].set_xlabel('Step')\n", "axes[0].set_ylabel('Loss')\n", "axes[0].set_title('Validation Loss Curve')\n", "axes[0].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", "axes[1].set_xlabel('Step')\n", "axes[1].set_ylabel('Error')\n", "axes[1].set_title('Validation Error Curve')\n", "axes[1].grid(True, linestyle='--', linewidth=0.5, alpha=0.6)\n", "\n", "training_args = {\n", " 'train_dataset': train_dataset,\n", " 'eval_dataset': valid_dataset,\n", " 'test_dataset': test_dataset,\n", " 'learning_rate': 1.0e-5,\n", " 'num_epochs': 100,\n", " 'batch_size': 256,\n", " 'weight_decay': 0.0,\n", " 'logging_steps': 3,\n", " 'eval_steps': 500,\n", " 'plot': False,\n", " 'print_log_epochs': 0,\n", " 'print_eval': False\n", "}\n", "\n", "for index, model_arch in enumerate(model_archs):\n", " model = (\n", " Model_5(input_size=3, hidden_size=512, output_size=1)\n", " if model_arch == \"LSTM\" else\n", " Model_6(input_size=3, hidden_size=512, output_size=1)\n", " ).to(device)\n", " \n", " print(f\"模型{index + 1}(模型架构={model_arch})开始训练:\")\n", " trainer = Trainer(model=model, **training_args)\n", " curves = trainer.train()['curves']\n", "\n", " eval_log_steps, eval_losses = zip(*curves['eval_loss_curve'])\n", " axes[0].plot(\n", " eval_log_steps, eval_losses,\n", " label=f\"model arch={model_arch}\", color=plot_colors[index]\n", " )\n", " eval_log_steps, eval_errors = zip(*curves['eval_error_curve'])\n", " axes[1].plot(\n", " eval_log_steps, eval_errors, \n", " label=f\"model arch={model_arch}\", color=plot_colors[index]\n", " )\n", "\n", "axes[0].legend()\n", "axes[1].legend()\n", "plt.tight_layout()\n", "plt.show()" ] }, { "cell_type": "markdown", "id": "6290b2f4-6346-44d3-b79f-7ed78279425a", "metadata": {}, "source": [ "收敛曲线和测试集实验结果都表明,GRU比LSTM能力更优,且运行速度更快。" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.13" } }, "nbformat": 4, "nbformat_minor": 5 }