forked from britishredcrosssociety/covid-19-vulnerability
-
Notifications
You must be signed in to change notification settings - Fork 0
/
init.r
39 lines (35 loc) · 1.74 KB
/
init.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#' Add numbers only if at least one of them isn't zero or NA
#' If both numbers are zero or NA, return zero
#'
#' @param x First number
#' @param y Second number
#'
#' @examples
#' 1 %++% 1 # == 2
#' 1 %++% NA # == 1
`%++%` = function(x, y) {
if ( (is.na(x) | x == 0) & (is.na(y) | y == 0) ) {
0
} else {
ifelse(is.na(x), 0, x) + ifelse(is.na(y), 0, y)
}
}
#' Calculate risk quantiles
#'
#' @param risk.col The data to quantise
#' @param quants Number of quantiles (default: 5)
#' @param highest.quantile.is.worst Should a risk score of 1 represent the highest/worst number in the data (FALSE) or the lowest/best (FALSE)?
#' @param style Method to use for calculating quantiles (passed to classIntervals; default: Fisher). One of "fixed", "sd", "equal", "pretty", "quantile", "kmeans", "hclust", "bclust", "fisher", "jenks" or "dpih"
#' @param samp_prop The proportion of samples to use, if slicing using "fisher" or "jenks" (passed to classIntervals; default: 100%)
#' @return A vector containing the risk quantiles
#'
calc_risk_quantiles = function(risk.col, quants = 5, highest.quantile.is.worst = TRUE, style = "fisher", samp_prop = 1) {
# calculate the quantile breaks
q_breaks = classInt::classIntervals(risk.col, quants, style = style, samp_prop = samp_prop, largeN = length(risk.col))
q = as.integer(cut(risk.col, breaks = q_breaks$brks, include.lowest = T)) # create a column with the risk quantiles as a number (e.g. from 1 to 5, if using quintiles)
if (!highest.quantile.is.worst) {
max_quant = max(q, na.rm = TRUE) # get the max. quantile in the dataset (won't always be equal to `quants`, e.g. if nrows(d) < quants)
q = (max_quant + 1) - q # reverse the quantile scoring so 1 = highest risk
}
q # return the quantiles
}