forked from britishredcrosssociety/covid-19-vulnerability
-
Notifications
You must be signed in to change notification settings - Fork 0
/
prep deprivation - NI.r
105 lines (80 loc) · 4.84 KB
/
prep deprivation - NI.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
##
## Bespoke Index of Multiple Deprivation for Northern Ireland
##
library(tidyverse)
library(httr)
library(readxl)
library(janitor)
library(Hmisc)
source("load lookup tables.r")
lsoa_lad = load_lookup_sa_lgd() %>% select(LSOA11CD, LAD19CD = LAD18CD) %>% distinct()
##
## helper functions
##
# use the exponential transformation function listed here: https://www.nisra.gov.uk/sites/nisra.gov.uk/files/publications/NIMDM_2010_Report_0.pdf#page=68
exp_transform = function(x) -23 * log(1 - x * (1 -exp(-100/23)))
scale_rank = function(x) (x - 0.5) / nrow(imd_ni_scores) # normalise so rank is between 0 and 1 - see Annex B of https://www.nisra.gov.uk/sites/nisra.gov.uk/files/publications/NIMDM%202017_Technical%20Report.pdf
###############################################################################
## Create an index of multiple deprivation for income, employment, barriers and environment
##
# download "Scottish Index of Multiple Deprivation 2020: data zones", which conveniently includes domain ranks alongside ward and LA lookups
# source: https://www.gov.scot/publications/scottish-index-of-multiple-deprivation-2020-data-zone-look-up/
GET("https://www.nisra.gov.uk/sites/nisra.gov.uk/files/publications/NIMDM17_SOAresults.xls",
write_disk(tf <- tempfile(fileext = ".xls")))
imd_ni_scores = read_excel(tf, sheet = "MDM")
unlink(tf); rm(tf)
# create the mutiple deprivation index, using recommended weightings
# uncomment the commented lines to replicate the full IMD, to check this process works (it does)
imd_ni_covid = imd_ni_scores %>%
select(LSOA11CD = SOA2001
, Income = `Income Domain Rank \n(where 1 is most deprived)`, Employment = `Employment Domain Rank (where 1 is most deprived)`
, Barriers = `Access to Services Domain Rank (where 1 is most deprived)`, Environment = `Living Environment Domain Rank (where 1 is most deprived)`
# uncomment this line if you want to recapitulate the full IMD score
, Education = `Education, Skills and Training Domain Rank (where 1 is most deprived)`, Health = `Health Deprivation and Disability Domain Rank (where 1 is most deprived)`, Crime = `Crime and Disorder Domain Rank (where 1 is most deprived)`
) %>%
# normalise the ranks so they're between 0 and 1
mutate_if(is.numeric, list(scaled = scale_rank)) %>%
# "Each domain rank is standardised and transformed to an exponential distribution" (https://www2.gov.scot/Resource/0050/00504766.pdf)
mutate_at(vars(ends_with("_scaled")), exp_transform) %>%
# weights in section 12.2 of https://www.nisra.gov.uk/sites/nisra.gov.uk/files/publications/NIMDM%202017_Technical%20Report.pdf
mutate(IMD_score = (Income_scaled * 25) + (Employment_scaled * 25) + (Barriers_scaled * 10) + (Environment_scaled * 5)
# uncomment if you want to calculate full IMD score
+ (Education_scaled * 15) + (Health_scaled * 15) + (Crime_scaled * 5)
) %>%
# calculate IMD rank
mutate(IMD_rank = rank(IMD_score)) %>% # need to reverse the scoring of R's ranking algorithm to get the same style of ranking as in IMD; add 1 to make it not zero-based
# calculate IMD decile
mutate(IMD_decile = as.integer(cut2(IMD_rank, g = 10)),
IMD_quintile = as.integer(cut2(IMD_rank, g = 5)))
write_csv(imd_ni_scores, "output/covid-deprivation-northern-ireland-LSOA.csv")
##
## DEBUGGING
##
# # how well does our calculated ranking (if using the full set of domains) recapitulate the original ranking? Pretty well: r = 0.9629196
# cor(imd_ni_scores$`Multiple Deprivation Measure Rank \n(where 1 is most deprived)`, imd_ni_covid$IMD_rank)
#
# # manually check individual examples
# imd_ni_covid %>%
# filter(IMD_rank == 500) %>% # change this number
# select(LSOA11CD, IMD_rank) %>%
# left_join(imd_ni_scores %>% select(SOA2001, `Multiple Deprivation Measure Rank \n(where 1 is most deprived)`), by = c("LSOA11CD" = "SOA2001"))
###############################################################################
## Calculate proportion of the most-deprived LSOAs in each ward
## - TO COME: couldn't find a lookup table from Super Output Area to Ward
##
#...
###############################################################################
## Calculate proportion of the most-deprived LSOAs in each Local Authority
##
imd_ni_covid_lad = imd_ni_covid %>%
left_join(lsoa_lad, by = "LSOA11CD") %>%
# label LSOAs by whether they're in top 20% most-deprived then summarise by this label
mutate(IMD_top20 = ifelse(IMD_decile <= 2, "Top20", "Other")) %>%
tabyl(LAD19CD, IMD_top20) %>%
# calculate proportion of most deprived LSOAs
mutate(Prop_top20 = Top20 / (Top20 + Other)) %>%
# split into quintiles
mutate(Deprivation_q = as.integer(cut2(Prop_top20, g = 5))) %>%
select(-Other, -Top20)
write_csv(imd_ni_covid_lad, "output/covid-deprivation-northern-ireland-LA.csv")
rm(imd_ni_scores, lsoa_lad, exp_transform, scale_rank)