diff --git "a/code/2021_autumn/\351\273\204\347\250\21320210282130110/.keep" "b/code/2021_autumn/\351\273\204\347\250\21320210282130110/.keep" new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git "a/code/2021_autumn/\351\273\204\347\250\21320210282130110/2021282130110\351\273\204\347\250\213.ipynb" "b/code/2021_autumn/\351\273\204\347\250\21320210282130110/2021282130110\351\273\204\347\250\213.ipynb" new file mode 100644 index 0000000000000000000000000000000000000000..dc0186ac895ef56d45c9c174f3e4ab69c616d2f1 --- /dev/null +++ "b/code/2021_autumn/\351\273\204\347\250\21320210282130110/2021282130110\351\273\204\347\250\213.ipynb" @@ -0,0 +1,683 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "db5d9571", + "metadata": {}, + "source": [ + "### 导入相应的包,并定义运行环境的参数。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3086b361", + "metadata": {}, + "outputs": [], + "source": [ + "# 定义运行环境\n", + "import os\n", + "import argparse\n", + "import numpy as np\n", + "import pandas as pd\n", + "from mindspore import context\n", + "from mindspore import dataset as ds\n", + "import mindspore.nn as nn\n", + "from mindspore.common.initializer import Normal, initializer\n", + "from mindspore import dtype as mstype\n", + "from mindspore import Tensor\n", + "from mindspore import ops\n", + "from mindspore.nn.loss import MSELoss\n", + "from mindspore import Parameter\n", + "\n", + "parser = argparse.ArgumentParser(description='NN Homework')\n", + "parser.add_argument('--device_target', type=str, default=\"GPU\")\n", + "\n", + "args = parser.parse_args(args=[])\n", + "context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)" + ] + }, + { + "cell_type": "markdown", + "id": "2c9a24d6", + "metadata": {}, + "source": [ + "## 数据预处理" + ] + }, + { + "cell_type": "markdown", + "id": "86e3166a", + "metadata": {}, + "source": [ + "### 导入原始数据" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd1ddaf6", + "metadata": {}, + "outputs": [], + "source": [ + "# 导入所有数据并合并,原始数据是保存在多个.csv文件中的,将其逐一导入并合并。\n", + "\n", + "data = []\n", + "for i in range(1,26):\n", + " filename = r'Hangzhou-mobility-data-set\\record_2019-01-' + str(i).rjust(2,'0') + '.csv'\n", + " data_i = pd.read_csv(filename, encoding='utf-8')\n", + " data.append(data_i)\n", + "data = pd.concat(data, axis=0)\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef19a0e5", + "metadata": {}, + "outputs": [], + "source": [ + "# 把我们需要的列提取出来并保存合并的数据\n", + "data = data[['time', 'stationID','status']]\n", + "data.to_csv(r'Hangzhou-mobility-data-set\\record_all.csv', encoding='utf-8', index=None)" + ] + }, + { + "cell_type": "markdown", + "id": "47bbcc92", + "metadata": {}, + "source": [ + "### 根据时间戳提取时间片:每10分钟算一个时间片,总共一个月的数据。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7514948e", + "metadata": {}, + "outputs": [], + "source": [ + "data['time_slice'] = data['time'].apply(lambda r: (int(r.split(' ')[0][-2:]) - 1)*144 + int(r.split(' ')[1].split(':')[0])*6 + int(int(r.split(' ')[1].split(':')[1])/10) + 1)\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f20b471f", + "metadata": {}, + "outputs": [], + "source": [ + "# 把我们需要的列提取出来并保存数据\n", + "data = data[['stationID','status', 'time_slice']]\n", + "data.to_csv(r'Hangzhou-mobility-data-set\\record_all.csv', encoding='utf-8', index=None)" + ] + }, + { + "cell_type": "markdown", + "id": "b6f05d15", + "metadata": {}, + "source": [ + "### 流量数据分为进出两部分,将每个时间片中每个传感器记录的车流量数映射到一个表格中,行号为时间片,列号为传感器编号。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13299db8", + "metadata": {}, + "outputs": [], + "source": [ + "data_in = data[data['status']==1][['time_slice', 'stationID']]\n", + "data_out = data[data['status']==0][['time_slice', 'stationID']]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "95065bd1", + "metadata": {}, + "outputs": [], + "source": [ + "train = np.zeros((3588, 81, 2))\n", + "def func(r):\n", + " train[r['time_slice']-13, r['stationID'], r['status']]+=1\n", + "data.apply(func, axis=1)\n", + "pd.DataFrame(train[:,:,0])" + ] + }, + { + "cell_type": "markdown", + "id": "a2f7e46c", + "metadata": {}, + "source": [ + "### 将得到的用于训练的数据保存。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c8e7871", + "metadata": {}, + "outputs": [], + "source": [ + "# 这就是将要用来训练的数据\n", + "np.save(r'Hangzhou-mobility-data-set\\train', train)" + ] + }, + { + "cell_type": "markdown", + "id": "27232b91", + "metadata": {}, + "source": [ + "## 模型训练部分" + ] + }, + { + "cell_type": "markdown", + "id": "a1ee5170", + "metadata": {}, + "source": [ + "### 加载数据,包括两个数据:一个是传感器之间的邻接矩阵,一个是预处理好的历史流量数据。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5980ef2e", + "metadata": {}, + "outputs": [], + "source": [ + "def load_adjacency_matrix(adj_path, dtype=np.float32):\n", + " adj_df = pd.read_csv(adj_path, header=None)\n", + " adj_df = adj_df.iloc[1:,1:].copy()\n", + " adj = np.array(adj_df, dtype=dtype)\n", + " return adj" + ] + }, + { + "cell_type": "markdown", + "id": "572ed927", + "metadata": {}, + "source": [ + "### 将数据进行划分,并将调整数据格式,得到训练用数据集" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "675ad7eb", + "metadata": {}, + "outputs": [], + "source": [ + "def create_dataset(data_path, seq_len, pre_len, time_len=None, split_ratio=0.8, normalize=True, batch_size=64):\n", + " # 定义数据集\n", + "\n", + " # 训练前的数据集划分\n", + " # 加载npy数据\n", + " data = np.load(data_path)\n", + " # 处理54号站点的缺失值 线性插值,用相邻两个站点的平均值填充\n", + " data[:,54] = (data[:, 53] + data[:, 55]) / 2\n", + " if time_len is None:\n", + " time_len = data.shape[0]\n", + " if normalize:\n", + " max_val = np.max(data)\n", + " data = data / max_val\n", + " train_size = int(time_len * split_ratio)\n", + " train_data = data[:train_size]\n", + " test_data = data[train_size:time_len]\n", + " train_X, train_Y, test_X, test_Y = list(), list(), list(), list()\n", + " for i in range(len(train_data) - seq_len - pre_len):\n", + " train_X.append(np.array(train_data[i:i + seq_len]))\n", + " train_Y.append(np.array(train_data[i + seq_len:i + seq_len + pre_len]))\n", + " for i in range(len(test_data) - seq_len - pre_len):\n", + " test_X.append(np.array(test_data[i:i + seq_len]))\n", + " test_Y.append(np.array(test_data[i + seq_len:i + seq_len + pre_len]))\n", + " \n", + " data_Train = (np.array(train_X), np.array(train_Y))\n", + " data_Test = (np.array(test_X), np.array(test_Y))\n", + " ds_train = ds.NumpySlicesDataset(data_Train).batch(batch_size=batch_size)\n", + " ds_test = ds.NumpySlicesDataset(data_Test).batch(batch_size=len(test_X))\n", + " return ds_train, ds_test\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "2e671a19", + "metadata": {}, + "source": [ + "### 用12步的数据预测后一步数据" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac490611", + "metadata": {}, + "outputs": [], + "source": [ + "ds_train, ds_test = create_dataset('train.npy', 12, 1)" + ] + }, + { + "cell_type": "markdown", + "id": "2dd695c9", + "metadata": {}, + "source": [ + "### 拉普拉斯矩阵的正则化和构建切比雪夫多项式" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "44eb1438", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from scipy.sparse.linalg import eigs\n", + "\n", + "def scaled_Laplacian(W):\n", + " '''\n", + " compute \\tilde{L}\n", + "\n", + " Parameters\n", + " ----------\n", + " W: np.ndarray, shape is (N, N), N is the num of vertices\n", + "\n", + " Returns\n", + " ----------\n", + " scaled_Laplacian: np.ndarray, shape (N, N)\n", + "\n", + " '''\n", + "\n", + " assert W.shape[0] == W.shape[1]\n", + "\n", + " D = np.diag(np.sum(W, axis=1))\n", + "\n", + " L = D - W\n", + "\n", + " lambda_max = eigs(L, k=1, which='LR')[0].real\n", + "\n", + " return (2 * L) / lambda_max - np.identity(W.shape[0])\n", + "\n", + "def cheb_polynomial(L_tilde, K):\n", + " '''\n", + " compute a list of chebyshev polynomials from T_0 to T_{K-1}\n", + "\n", + " Parameters\n", + " ----------\n", + " L_tilde: scaled Laplacian, np.ndarray, shape (N, N)\n", + "\n", + " K: the maximum order of chebyshev polynomials\n", + "\n", + " Returns\n", + " ----------\n", + " cheb_polynomials: list(np.ndarray), length: K, from T_0 to T_{K-1}\n", + "\n", + " '''\n", + "\n", + " N = L_tilde.shape[0]\n", + "\n", + " cheb_polynomials = [np.identity(N), L_tilde.copy()]\n", + "\n", + " for i in range(2, K):\n", + " cheb_polynomials.append(2 * L_tilde * cheb_polynomials[i - 1] - cheb_polynomials[i - 2])\n", + "\n", + " return cheb_polynomials" + ] + }, + { + "cell_type": "markdown", + "id": "ed6a379e", + "metadata": {}, + "source": [ + "### 根据邻接矩阵构建切比雪夫多项式" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1db9ff4", + "metadata": {}, + "outputs": [], + "source": [ + "K = 3\n", + "adj = load_adjacency_matrix(r'Metro_roadMap.csv')\n", + "L_tilde = scaled_Laplacian(adj)\n", + "cheb_polynomials = [Tensor.from_numpy(i) for i in cheb_polynomial(L_tilde, K)]" + ] + }, + { + "cell_type": "markdown", + "id": "7bda805b", + "metadata": {}, + "source": [ + "### 构建网络模型,包括两次切比雪夫图卷积和GRU。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35a10d84", + "metadata": {}, + "outputs": [], + "source": [ + "class cheb_conv(nn.Cell):\n", + " \"\"\"\n", + " 网络结构\n", + " \"\"\"\n", + " def __init__(self, K, cheb_polynomials, num_gru_units: int, output_dim: int):\n", + " super(cheb_conv, self).__init__()\n", + " \n", + " self.K = K\n", + " self.cheb_polynomials = cheb_polynomials\n", + " self._num_gru_units = num_gru_units\n", + " self._output_dim = output_dim\n", + " self.Theta = Parameter(initializer('Normal', shape=[K, self._num_gru_units + 2, self._output_dim], dtype=mstype.float32))\n", + "\n", + " def construct(self, inputs, hidden_state):\n", + " batch_size, num_nodes, f_in = inputs.shape\n", + " # hidden_state (batch_size, num_nodes, num_gru_units)\n", + " hidden_state = hidden_state.reshape((batch_size, num_nodes, self._num_gru_units))\n", + " # [x, h] (batch_size, num_nodes, num_gru_units + 2)\n", + " concat = ops.Concat(2)\n", + " concatenation = concat((inputs, hidden_state))\n", + " \n", + " output = Tensor.zeros(batch_size, num_nodes, self._output_dim) # (b, N, F_out)\n", + "\n", + " for k in range(self.K):\n", + "\n", + " T_k = self.cheb_polynomials[k] # (N,N)\n", + "\n", + " theta_k = self.Theta[k] # (num_gru_units+2, output_dim)\n", + " \n", + " matmul = ops.MatMul()\n", + " rhs = concatenation.reshape(0, 2, 1)\n", + " rhs = matmul(rhs, T_k).reshape(0, 2, 1)\n", + " output = output + matmul(rhs, theta_k)\n", + " \n", + " \n", + " # A[x, h]W + b (batch_size, num_nodes * output_dim)\n", + " output = output.reshape((batch_size, num_nodes * self._output_dim))\n", + " return output\n", + " \n", + "class Cheb_GRU_Cell(nn.Cell):\n", + " def __init__(self, input_dim: int, hidden_dim: int, K: int, cheb_polynomials):\n", + " super(Cheb_GRU_Cell, self).__init__()\n", + " self._input_dim = input_dim\n", + " self._hidden_dim = hidden_dim\n", + " self.cheb_conv_1 = cheb_conv(K, cheb_polynomials, self._hidden_dim, self._hidden_dim * 2)\n", + " self.cheb_conv_2 = cheb_conv(K, cheb_polynomials, self._hidden_dim, self._hidden_dim)\n", + "\n", + " def forward(self, inputs, hidden_state):\n", + " # [r, u] = sigmoid(A[x, h]W + b)\n", + " # [r, u] (batch_size, num_nodes * (2 * num_gru_units))\n", + " concatenation = nn.Sigmoid(self.cheb_conv_1(inputs, hidden_state))\n", + " # r (batch_size, num_nodes, num_gru_units), u (batch_size, num_nodes, num_gru_units)\n", + " split = ops.Split(1, 2)\n", + " r, u = split(concatenation)\n", + " # c = tanh(A[x, (r * h)W + b])\n", + " # c (batch_size, num_nodes * num_gru_units)\n", + " c = nn.Tanh(self.cheb_conv_2(inputs, r * hidden_state))\n", + " # h := u * h + (1 - u) * c\n", + " # h (batch_size, num_nodes * num_gru_units)\n", + " new_hidden_state = u * hidden_state + (1.0 - u) * c\n", + " return new_hidden_state, new_hidden_state\n", + "\n", + "class Cheb_GRU(nn.Cell):\n", + " def __init__(self, hidden_dim: int, f_out: int, K: int, cheb_polynomials, **kwargs):\n", + " super(Cheb_GRU, self).__init__()\n", + " self._input_dim = adj.shape[0]\n", + " self._hidden_dim = hidden_dim\n", + " self._f_out = f_out\n", + " self.cheb_gru_cell = Cheb_GRU_Cell(self._input_dim, self._hidden_dim, K, cheb_polynomials)\n", + " self._weights = Parameter(initializer('Normal', shape=[self._hidden_dim, self._f_out], dtype=mstype.float32))\n", + " self._biases = Parameter(initializer('Normal', shape=[self._f_out], dtype=mstype.float32))\n", + "\n", + " def forward(self, inputs):\n", + " batch_size, seq_len, num_nodes, f_in = inputs.shape\n", + " assert self._input_dim == num_nodes\n", + " hidden_state = Tensor.zeros(batch_size, num_nodes * self._hidden_dim).type_as(inputs)\n", + " output = None\n", + " for i in range(seq_len):\n", + " output, hidden_state = self.cheb_gru_cell(inputs[:, i, :, :], hidden_state)\n", + " output = output.reshape((-1, self._hidden_dim))\n", + " mul = ops.MatMul()\n", + " pred = mul(output, self._weights)\n", + " pred = pred.add(self._biases)\n", + " pred = pred.reshape((-1, num_nodes, self._f_out))\n", + " pred = pred.transpose(1, 2)\n", + " return pred\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "89d16bbf", + "metadata": {}, + "source": [ + "### 实例化模型" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0ec899a7", + "metadata": {}, + "outputs": [], + "source": [ + "net = Cheb_GRU(100,1,3,cheb_polynomials)" + ] + }, + { + "cell_type": "markdown", + "id": "5981f9e7", + "metadata": {}, + "source": [ + "### 定义损失函数为带L2正则化的MSE。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23b6d040", + "metadata": {}, + "outputs": [], + "source": [ + "class MSE_L2Loss(MSELoss):\n", + " def __init__(self, reduction=\"mean\"):\n", + " super(MSE_L2Loss, self).__init__(reduction)\n", + "\n", + " def construct(self, inputs, targets, model, lamda=1.5e-3):\n", + " reg_loss = 0.\n", + " for param in model.parameters():\n", + " reg_loss += Tensor.sum(param ** 2) / 2\n", + " reg_loss = lamda * reg_loss\n", + " mse_loss = Tensor.sum((inputs - targets) ** 2) / 2\n", + " return self.get_loss(mse_loss+reg_loss)" + ] + }, + { + "cell_type": "markdown", + "id": "21cea5fc", + "metadata": {}, + "source": [ + "### 直接使用框架自带的优化器Adam" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14c6a451", + "metadata": {}, + "outputs": [], + "source": [ + "optimizer = nn.Adam(net.trainable_params(), learning_rate=0.01, beta1=0.9, beta2=0.999, eps=1e-8, use_locking=False, use_nesterov=False, weight_decay=0.0, loss_scale=1.0)\n", + "loss = MSELoss()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "69fbf789", + "metadata": {}, + "outputs": [], + "source": [ + "from mindspore.train.callback import ModelCheckpoint, CheckpointConfig\n", + "# 设置模型保存参数\n", + "config_ck = CheckpointConfig(save_checkpoint_steps=100, keep_checkpoint_max=10)\n", + "# 应用模型保存参数\n", + "ckpoint = ModelCheckpoint(prefix=\"checkpoint_lenet\", config=config_ck)" + ] + }, + { + "cell_type": "markdown", + "id": "657c7ab1", + "metadata": {}, + "source": [ + "### 定义训练和测试函数" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db0ae71e", + "metadata": {}, + "outputs": [], + "source": [ + "from mindspore.train.callback import LossMonitor\n", + "from mindspore import Model\n", + "def train_net(model, epoch_size, ckpoint_cb):\n", + " \"\"\"定义训练的方法\"\"\"\n", + " model.train(epoch_size, ds_train, callbacks=[ckpoint_cb, LossMonitor()])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e8994dd", + "metadata": {}, + "outputs": [], + "source": [ + "def test_net(model):\n", + " \"\"\"定义验证的方法\"\"\"\n", + " acc = model.eval(ds_test)\n", + " print(\"{}\".format(acc))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1867e383", + "metadata": {}, + "outputs": [], + "source": [ + "from mindspore.nn import MSE, MAE\n", + "model = Model(net, loss, optimizer, metrics={\"MSE\": MSE(), \"MAE\":MAE()})\n", + "train_net(args, model, 1000, ckpoint)\n", + "test_net(net, model)" + ] + }, + { + "cell_type": "markdown", + "id": "86cf4555", + "metadata": {}, + "source": [ + "## 演示模型效果" + ] + }, + { + "cell_type": "markdown", + "id": "a9d2ef53", + "metadata": {}, + "source": [ + "### 用训练好的模型预测并画图展示" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "87b2fdd7", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "%matplotlib inline\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "plt.rcParams['font.sans-serif'] = ['SimHei'] #用来显示中文标签\n", + "plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号\n", + "sns.set(font='SimHei', style='dark', ) # 解决Seaborn中文显示问题" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f1cd6ae", + "metadata": {}, + "outputs": [], + "source": [ + "def draw(ds_test, net):\n", + " X,y = ds_test\n", + " pred = net(X)\n", + " \n", + " max_data = np.max(X)\n", + " y = y.numpy() * max_data\n", + " pred = pred.numpy() * max_data\n", + " figure, axes =plt.subplots(2,2,figsize=(18,12),dpi=250)\n", + " id = [20,30,35,80]\n", + " for i in range(2):\n", + " for j in range(2):\n", + " y_i = y[:, 0, id[i*2+j]]\n", + " pred_i = pred[:,0,id[i*2+j]]\n", + " data_to_plot = pd.concat([pd.DataFrame(y_i), pd.DataFrame(pred_i)], axis=1)\n", + " data_to_plot.columns = ['真实值', '预测值']\n", + " ax=axes[i][j]\n", + " ax.set_xlabel(\"时间片\")\n", + " ax.set_ylabel(\"数量(辆)\")\n", + " ax.set_title(\"station_{}\".format(id[i*2+j]))\n", + " #print(datas[(i*2+j)*4:(i*2+j+1)*4][0].shape)\n", + " data_to_plot[144:288].plot(ax=ax)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7694a98e", + "metadata": {}, + "outputs": [], + "source": [ + "draw(ds_test, net)" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "9d8c2e1b118569977d31fd3bdc4d41a7ae6ddd7e3f9ab808fd9831b3bbc785fd" + }, + "kernelspec": { + "display_name": "MindSpore", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}