library(simulist)
library(epiparameter)
library(dplyr)
library(tidyr)
library(lubridate)
library(ggplot2)
library(sf)
Appendix E — Simulate line list
Set the distribution for all parameters.
<- epiparameter::epidist(
contact_distribution disease = "COVID-19",
epi_dist = "contact distribution",
prob_distribution = "pois",
prob_distribution_params = c(mean = 2)
)
<- epiparameter::epidist(
infectious_period disease = "COVID-19",
epi_dist = "infectious period",
prob_distribution = "gamma",
prob_distribution_params = c(shape = 3, scale = 2)
)
<- epiparameter::epidist_db(
onset_to_hosp disease = "COVID-19",
epi_dist = "onset to hospitalisation",
single_epidist = TRUE
)
<- epiparameter::epidist_db(
onset_to_death disease = "COVID-19",
epi_dist = "onset to death",
single_epidist = TRUE
)
We create two overlapping outbreaks.
# First peak
set.seed(1)
<- sim_linelist(
p1 contact_distribution = contact_distribution,
infectious_period = infectious_period,
onset_to_hosp = onset_to_hosp,
onset_to_death = onset_to_death,
outbreak_size = c(1000, 1500),
hosp_risk = 0.2,
hosp_death_risk = 0.2,
anonymise = T,
population_age = c(0, 25),
outbreak_start_date = as.Date("2023-01-01")
)
Warning: Number of cases exceeds maximum outbreak size.
Returning data early with 1513 cases and 2946 total contacts (including cases).
# Second peak
set.seed(17)
<- sim_linelist(
p2 contact_distribution = contact_distribution,
infectious_period = infectious_period,
onset_to_hosp = onset_to_hosp,
onset_to_death = onset_to_death,
outbreak_size = c(1000, 1200),
hosp_risk = 0.2,
hosp_death_risk = 0.2,
anonymise = T,
population_age = c(3, 35),
outbreak_start_date = as.Date("2023-05-01")
)
Warning: Number of cases exceeds maximum outbreak size.
Returning data early with 1249 cases and 2417 total contacts (including cases).
Add the district of residence.
# 22 districts in HCMC
<- c("Cu Chi", "Hoc Mon", "Quan 12", "Go Vap", "Binh Chanh", "Binh Tan", "Tan Phu", "Tan Binh", "Phu Nhuan", "Binh Thanh", "Thu Duc", "Quan 6", "Quan 11", "Quan 10", "Quan 3", "Quan 1", "Quan 5", "Can Gio", "Nha Be", "Quan 4", "Quan 8", "Quan 7")
districts
# Let have the first peak in the west, the second peak in the south
<- round(rnorm(10000, mean = 6, sd = 6))
sampling_space $district <- sample(sampling_space[sampling_space >= 1 & sampling_space <= 22], nrow(p1))
p1$district <- districts[p1$district]
p1
<- round(rnorm(10000, mean = 15, sd = 5))
sampling_space $district <- sample(sampling_space[sampling_space >= 1 & sampling_space <= 22], nrow(p2))
p2$district <- districts[p2$district]
p2
$outbreak <- "1st outbreak"
p1$outbreak <- "2nd outbreak"
p2
<- rbind(p1, p2) df
Let see the spatial distribution of the first and second peak.
Code
<- "data/gadm41_hcmc_district.rds"
map_p <- readRDS(map_p)
mapdt <- mapdt[,c(1,3)]
mapdt colnames(mapdt) <- c("district", "geom")
$district <- stringi::stri_trans_general(mapdt$district, "latin-ascii")
mapdt
<- df |>
df_plot count(outbreak, district) |>
as.data.frame() |>
complete(outbreak, district = unique(mapdt$district)) |>
left_join(mapdt, by = "district") |>
st_as_sf()
ggplot(df_plot) +
geom_sf(aes(fill = n)) +
scale_fill_viridis_c(na.value="white") +
facet_wrap(~ outbreak) +
theme_light() +
theme(
axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank()
)
Have a look at the final data.
Code
# Clean the date columns and remove non important columns
<- df |> mutate_if(is.Date, ymd) |> select(-ct_value, -case_type, -outbreak)
df
<- df |>
df_plot count(date_onset)
ggplot(df_plot, aes(x = date_onset, y = n)) +
geom_bar(stat = "identity", width = 1, fill = "cornflowerblue") +
theme_minimal()
head(df)