Contents

KNN imputation

Contents

KNN imputation#

import os
import pandas as pd
import numpy as np

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.impute import KNNImputer

master = pd.read_csv('https://raw.githubusercontent.com/CharlotteJames/ed-forecast/main/data/master_scaled_impute.csv',
                  index_col=0)

master.head()

	ccg	month	111_111_offered	111_111_answered	amb_sys_made	amb_sys_answered	gp_appt_attended	gp_appt_available	ae_attendances_attendances	population	People	Places	Lives	year
0	00N	Jan	568.479362	459.899870	216.450677	148.439545	4338.335607	4738.295678	385.585466	15.0265	94.8	101.7	96.8	2018
1	00N	Feb	472.022555	395.194004	204.869142	147.335865	3704.655109	4074.002595	347.519382	15.0265	94.8	101.7	96.8	2018
2	00N	Mar	541.224032	453.863381	233.092813	168.121234	3907.696403	4321.498686	383.389345	15.0265	94.8	101.7	96.8	2018
3	00N	Apr	506.059962	438.172926	210.990836	149.848422	3721.092736	4089.042691	396.299870	15.0265	94.8	101.7	96.8	2018
4	00N	May	517.326603	452.985246	223.273261	164.595494	3848.800453	4224.337005	422.187469	15.0265	94.8	101.7	96.8	2018

master.shape

(1920, 14)

cols = master.columns[2:]

KNN#

%%time

for N in range(3,12):
    
    knn = KNNImputer(n_neighbors = N)

    master_imputed = knn.fit_transform(master[cols])
    
    master_imputed = pd.DataFrame(np.array(master_imputed), columns = cols )
    master_imputed['month'] = master['month']
    master_imputed['ccg'] = master['ccg']
    
    master_imputed.to_csv(f'../data/imputed/master_imputed_{N}.csv')

CPU times: user 2.71 s, sys: 972 ms, total: 3.68 s
Wall time: 829 ms