{ "cells": [ { "cell_type": "markdown", "id": "11381eef", "metadata": {}, "source": [ "# Predicting attendances next year " ] }, { "cell_type": "markdown", "id": "91158dcf", "metadata": {}, "source": [ "## Overview " ] }, { "cell_type": "markdown", "id": "475775e8", "metadata": {}, "source": [ "This notebook contains the code to train the MGSR on data from 2018 and forecast ED demand for 2019.\n", "\n", "The performance of the MGSR is assessed using the mean absolute percentage error (MAPE).\n" ] }, { "cell_type": "code", "execution_count": 1, "id": "29268eb1", "metadata": {}, "outputs": [], "source": [ "#turn warnings off to keep notebook tidy\n", "import warnings\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "markdown", "id": "b0a12a14", "metadata": {}, "source": [ "## Import libraries " ] }, { "cell_type": "code", "execution_count": 2, "id": "f9e92104", "metadata": {}, "outputs": [], "source": [ "import os\n", "import pandas as pd\n", "import numpy as np\n", "import pickle as pkl\n", "\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.ensemble import RandomForestRegressor\n", "\n", "\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import mean_absolute_percentage_error as mape\n", "\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "plt.style.use('ggplot')" ] }, { "cell_type": "markdown", "id": "a24d85cb", "metadata": {}, "source": [ "## Import data " ] }, { "cell_type": "code", "execution_count": 3, "id": "13122672", "metadata": {}, "outputs": [], "source": [ "dta = pd.read_csv('https://raw.githubusercontent.com/CharlotteJames/ed-forecast/main/data/master_scaled_new_pop.csv',\n", " index_col=0)" ] }, { "cell_type": "code", "execution_count": 4, "id": "e369d537", "metadata": {}, "outputs": [], "source": [ "dta.columns = ['_'.join([c.split('/')[0],c.split('/')[-1]])\n", " if '/' in c else c for c in dta.columns]" ] }, { "cell_type": "code", "execution_count": 5, "id": "48a6f288", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | ccg | \n", "month | \n", "111_111_offered | \n", "111_111_answered | \n", "amb_sys_made | \n", "amb_sys_answered | \n", "gp_appt_available | \n", "ae_attendances_attendances | \n", "population | \n", "People | \n", "Places | \n", "Lives | \n", "year | \n", "%>65 | \n", "%<15 | \n", "N>65 | \n", "N<15 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "00Q | \n", "Jan | \n", "406.655830 | \n", "308.945095 | \n", "310.561801 | \n", "234.716187 | \n", "4568.019766 | \n", "1179.855246 | \n", "14.8942 | \n", "97.2 | \n", "99.7 | \n", "94.4 | \n", "2018 | \n", "14.46402 | \n", "21.763505 | \n", "2.1543 | \n", "3.2415 | \n", "
1 | \n", "00Q | \n", "Feb | \n", "349.933603 | \n", "256.872981 | \n", "261.756435 | \n", "205.298797 | \n", "3910.918344 | \n", "1075.452189 | \n", "14.8942 | \n", "97.2 | \n", "99.7 | \n", "94.4 | \n", "2018 | \n", "14.46402 | \n", "21.763505 | \n", "2.1543 | \n", "3.2415 | \n", "
2 | \n", "00Q | \n", "Mar | \n", "413.247659 | \n", "300.690725 | \n", "303.676215 | \n", "234.716187 | \n", "4051.778545 | \n", "1210.874032 | \n", "14.8942 | \n", "97.2 | \n", "99.7 | \n", "94.4 | \n", "2018 | \n", "14.46402 | \n", "21.763505 | \n", "2.1543 | \n", "3.2415 | \n", "
3 | \n", "00Q | \n", "Apr | \n", "349.608595 | \n", "278.140171 | \n", "264.973181 | \n", "203.677924 | \n", "3974.433001 | \n", "1186.166427 | \n", "14.8942 | \n", "97.2 | \n", "99.7 | \n", "94.4 | \n", "2018 | \n", "14.46402 | \n", "21.763505 | \n", "2.1543 | \n", "3.2415 | \n", "
4 | \n", "00Q | \n", "May | \n", "361.100544 | \n", "284.419492 | \n", "294.361403 | \n", "227.926437 | \n", "4232.385761 | \n", "1299.297713 | \n", "14.8942 | \n", "97.2 | \n", "99.7 | \n", "94.4 | \n", "2018 | \n", "14.46402 | \n", "21.763505 | \n", "2.1543 | \n", "3.2415 | \n", "