forked from britishredcrosssociety/covid-19-vulnerability
-
Notifications
You must be signed in to change notification settings - Fork 0
/
prep prevalence of underlying conditions - Local Authority.r
56 lines (42 loc) · 2.43 KB
/
prep prevalence of underlying conditions - Local Authority.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
##
## Prevalence of underlying conditions - Local Authorities
## - using Modelled Prevalence Estimates from https://fingertips.phe.org.uk/profile/prevalence
##
library(tidyverse)
library(Hmisc)
source("load lookup tables.r")
phe = read_csv("data/indicators-DistrictUApre419.data.csv") # data manually downloaded from Public Health England
lad_17_19 = read_csv("data/LAD 2017 to LAD 2019 codes.csv")
##
## clinical
##
clinical_lad = phe %>%
filter(`Area Type` == "District & UA (pre 4/19)") %>%
# keep only most relevant indicators to C-19: http://covid19-phenomics.org/
filter(`Indicator Name` %in% c("Estimated prevalence of CHD (55-79 yrs)", "Estimated prevalence of Heart failure (16+)",
"Estimated prevalence of COPD (all ages)", "Estimated prevalence of peripheral arterial disease (PAD) (55-79 yrs)")) %>%
select(`Indicator Name`, Code = `Area Code`, Value) %>%
mutate(`Indicator Name` = str_replace(`Indicator Name`, " \\(.*\\)", "")) %>%
pivot_wider(names_from = `Indicator Name`, values_from = Value)
# some Local Authorities merged since this data was published - get the new LA codes and take average values for the indicators
clinical_lad = clinical_lad %>%
left_join(lad_17_19, by = c("Code" = "LAD17CD")) %>%
group_by(LAD19CD) %>%
summarise_if(is.numeric, mean, na.rm = TRUE)
# calculate overall vulnerability score
clinical_lad = clinical_lad %>%
# calculate quintiles for each indicator
mutate(CHD_q = as.integer(cut2(`Estimated prevalence of CHD`, g = 5))) %>%
mutate(COPD_q = as.integer(cut2(`Estimated prevalence of COPD`, g = 5))) %>%
mutate(HF_q = as.integer(cut2(`Estimated prevalence of Heart failure`, g = 5))) %>%
mutate(Arterial_q = as.integer(cut2(`Estimated prevalence of peripheral arterial disease`, g = 5))) %>%
# z-scores for indicators
mutate(CHD_z = as.numeric(scale(`Estimated prevalence of CHD`)),
COPD_z = as.numeric(scale(`Estimated prevalence of COPD`)),
HF_z = as.numeric(scale(`Estimated prevalence of Heart failure`)),
Arterial_z = as.numeric(scale(`Estimated prevalence of peripheral arterial disease`))) %>%
# clculate clinical vulnerability based on z-scores
mutate(Clinical_Risk = CHD_z + COPD_z + HF_z + Arterial_z) %>%
mutate(Clinical_Risk_q = as.integer(cut2(Clinical_Risk, g = 5)))
write_csv(clinical_lad, "output/clinical-vulnerability-LA.csv")
rm(phe, lad_17_19)