---
title: "Specifying Time-Varying Parameters"
output: 
  rmarkdown::html_vignette:
    toc: true
vignette: >
  %\VignetteIndexEntry{Specifying Time-Varying Parameters}
  %\VignetteEncoding{UTF-8}
  %\VignetteEngine{knitr::rmarkdown}
editor_options: 
  chunk_output_type: console
bibliography: macpan2.bib
link-citations: TRUE
---

[![status](https://img.shields.io/badge/status-working%20draft-red)](https://canmod.github.io/macpan2/articles/vignette-status#working-draft)

```{r pkgs, include = FALSE}
## matrix package version issues
do_random_effects = packageVersion("Matrix") >= "1.6-5"

library(macpan2)
library(macpan2helpers)
library(dplyr)
library(ggplot2)
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 6,
  fig.height = 4
  )
basefig_setup <- function() par(las = 1, bty = "l")
knitr::knit_hooks$set(basefig = basefig_setup)
```

```{r local_function, include=FALSE}
# to be included in mp_tmb_coef in the future
# see here, https://github.com/canmod/macpan2/issues/179
backtrans <- function(x) {
  vars1 <- intersect(c("default", "estimate", "conf.low", "conf.high"), names(x))
  prefix <- stringr::str_extract(x[["mat"]], "^log(it)?_")  |> tidyr::replace_na("none")
  sx <- split(x, prefix)
  for (ptype in setdiff(names(sx), "none")) {
    link <- make.link(stringr::str_remove(ptype, "_"))
    sx[[ptype]] <- (sx[[ptype]]
                    |> mutate(across(std.error, ~link$mu.eta(estimate)*.))
                    |> mutate(across(any_of(vars1), link$linkinv))
                    |> mutate(across(mat, ~stringr::str_remove(., paste0("^", ptype))))
    )
  }
  bind_rows(sx)
}

```


## Baseline SIR Model

Here we modify an [SIR](https://github.com/canmod/macpan2/tree/main/inst/starter_models/sir) model so that transmission rate is time-varying.

We initialize a vector of state labels and parameter default values for convenience and specify a simulation time of 50 time steps. 
```{r intializations}
state_labels = c("S", "I", "R")
time_steps = 50
beta = 0.8 # per-capita transmission rate
gamma = 0.2 # per-capita recovery rate
```

Using a fixed transmission rate of `r beta` we visualize the baseline SIR dynamics.
```{r baseline_sir, echo = FALSE}
# baseline SIR dynamics
("starter_models"
  |> mp_tmb_library("sir", package = "macpan2")
  |> mp_simulator(time_steps = time_steps
    , outputs = state_labels
    , default = list(beta = beta, gamma = gamma)
  )
  |> mp_trajectory()
  |> mutate(state = factor(matrix, state_labels))
  |> ggplot() 
  + geom_line(aes(time, value, colour = state))
  + theme_bw()
)
```

## Piecewise Time Variation

To create a piecewise time-varying transmission rate, we need to specify two variables:

1. `beta_changepoints` - An integer vector containing the starting time steps at which the transmission rate `beta` changes. This vector starts with a time step of 0 because we want to specify an initial default value of `beta` followed by changing transmission rates at the beginning of time-step 10 and 15. (**Note: Currently `macpan2::time_var` expects the argument `change_points` to start at 0, so the default value of `beta` in this example is not used. In future development, `time_var` will accept `change_points[1] > 0` and will use the default value of the parameter for time steps before `change_points[1]`. See [Update time_var to incorporate default value](https://github.com/canmod/macpan2/issues/196)**)
2. `beta_values` A numeric vector containing the values `beta` takes at each time step specified in `beta_changepoints`.

```{r piecewise_defaults}
beta_changepoints = c(0, 10, 15)
beta_values = c(0.8, 0.01, 0.4)
```

We then need to specify an expression that updates `beta` to `beta_values` at the time steps in `beta_changepoints`. We use the [`time_var`](https://canmod.github.io/macpan2/reference/engine_functions.html#time-indexing) function that will return a value for `beta`, from `beta_values`, by comparing the current time step with the time steps in `beta_changepoints`.
```{r time_var}
# see ?time_var for arguments
expr = list(
 beta ~ time_var(beta_values, beta_changepoints)
)

```

Let's test that `time_var` is computing what we want for 20 time steps.
```{r sim_time_var}
simple_sims(
    iteration_exprs = expr
  , time_steps = 20
  
  # for integer vectors (usually indexing vectors) use `int_vecs`
  , int_vecs = list(beta_changepoints = beta_changepoints)
  
  # for numeric vectors (model defaults) use `mats`
  # we need to initialize beta because it is a variable in our model
  # so we set to 0.8 - at the beginning of the simulation loop (time_step==1)
  # beta gets updated so the initial value of beta has no effect in this case
  , mats = list(
      beta = beta
    , beta_values = beta_values)

) |> filter(matrix == "beta")
```

Now that we know our expression is updating `beta` correctly, we can modify the SIR model to include this piece-wise transmission rate.
```{r piecewise_simulator}
# model specification with piece-wise transmission rates
piecewise_spec = (
  "starter_models"
  # read in model from library                    
  |> mp_tmb_library("sir", package = "macpan2")
  # insert expression for updating beta at the beginning of the simulation loop
  |> mp_tmb_insert(
      phase="during"
    , at=1
    , expressions = expr
    , default = list(
        # Note: the default value of beta here has no effect, because at the
        # first time step beta is updated to beta_values[1]
        beta = beta
      , gamma = gamma
      , beta_values = beta_values
      )
    , integers = list(beta_changepoints = beta_changepoints))
)
# check that model spec was updated accordingly
print(piecewise_spec)

# create simulator object
piecewise_simulator = (piecewise_spec
   |> mp_simulator(time_steps = time_steps, outputs=c(state_labels))

)

```

Now we plot the updated simulations using these change-points, which we highlight with vertical lines.
```{r time_varying_graph, echo=FALSE}
# simulated data from model
sim_data = mp_trajectory(piecewise_simulator)

(sim_data
  %>% mutate(state = factor(matrix, state_labels))
  %>% ggplot()
  + geom_line(aes(time, value, colour = state))
  + geom_point(aes(time, value, colour = state))
  + geom_vline(
    aes(xintercept = x),
    linetype = "dashed",
    alpha = 0.5,
    data = data.frame(x = beta_changepoints)
  )
  + theme_bw()
)
```

The clear changes in dynamics at times 10 and 15 are due to the drop and then lift of the transmission rate at these time steps.


## Calibrating Time Variation Parameters

First we simulate data to fit our model to, to see if we can recover the time-varying parameters.
```{r obs_data, basefig = TRUE}
set.seed(1L)
I_observed = (sim_data
              |> filter(matrix=="I")
              # add some poisson noise
              |> mutate(value = rpois(n(),value))
)
plot(I_observed$time, I_observed$value)
```


We often want to include parameter constraints in our models, and one way to do this implicitly is to transform the parameters [@bolker2008]. Further, transformations can sometimes help when estimating parameters by making the "likelihood surface closer to quadratic" and reducing parameter correlation [@bolker2008]. Here we log-transform $\beta$ because our constraint is that $\beta$ must be positive and the domain of the logarithm is $(0, \infty)$.

```{r transformed_spec}

# transformed model specification
transformed_spec = mp_tmb_insert(
    model = piecewise_spec
  , phase = "before"
  , at = 1
  # we need to exponentiate log transformed values so beta is on the appropriate
  # scale when computing model dynamics
  , expressions = list(beta_values ~ exp(log_beta_values))
  # we also need to specify default values for log_beta_values
  # here we set all default values for beta to the mean of the true values
  # i.e. mean(log(beta_values))
  , default = list(
      log_beta_values = rep(mean(log(beta_values)), length(beta_values))
    )
)
# Note the true values of beta (`beta_values`) are still included in the default
# list, however they get updated before the simulation loop, so they are not
# informing our estimates for these values when calibrating.
transformed_spec

```

Next we create the calibrator object and specify that we want to estimate the vector of time varying transmission rates, `log_beta_values`, by passing this parameter name to the `par` argument. 

```{r piecewise_calib}
# set up calibrator object
piecewise_calib = mp_tmb_calibrator(
    spec = transformed_spec
  , data = I_observed
  , traj = "I"
  # we want to estimate the log-transformed parameters
  , par = "log_beta_values"
  , outputs = state_labels
)

# optimization step
mp_optimize(piecewise_calib)
```

After optimizing, we can make a coefficient plot with the estimated values and their confidence intervals, to compare with the true values.

```{r piecewise_est, echo=FALSE, results=FALSE}
# get parameter estimates, and compare with true values
cc <- (mp_tmb_coef(piecewise_calib, conf.int=TRUE)
        # back transform to get beta values on original scale
        |> backtrans()
) |> cbind(true_value = beta_values)

(ggplot(cc,aes(estimate,row,col="estimated value"))
  + geom_point()
  + geom_errorbarh(aes(xmin=conf.low, xmax=conf.high))
  + geom_point(aes(true_value, row,col="true value"))
  + geom_vline(xintercept = 0, lty = 2)
  + ggtitle("Coefficient plot")
  + theme_bw()
  + ylab("beta_values")
)

```

In general, the transmission rate estimates follow the expected pattern changing from high, to low, to moderate. The most precise estimate, for the true value of `r beta_values[1]`, results because the observed prevalence data is most informative about the transmission rate when the infection is initially spreading. It makes sense that we get the most accurate estimate for the final transmission rate because most of the observed data is simulated with the final known transmission rate. We lose accuracy and precision for the middle rate because there is only `r beta_changepoints[3]-beta_changepoints[2]` time steps of observed data informing this parameter.


## Radial Basis Functions for Flexible Time Variation (In-Progress)

This section uses radial basis functions (RBFs) to generate models with a flexible functional form for smooth changes in the transmission rate.

Before we can add the fancy radial basis for the transmission rate, we need a base model. We use an SIR model that has been modified to include waning.

```{r sir_waning}
sir_waning = mp_tmb_library("starter_models"
  , "sir_waning"
  , package = "macpan2"
)
```


The `macpan2::rbf` function can be used to produce a matrix giving the values of each basis function (each column) at each time step (each row). Using this matrix, $X$, and a weights vector, $b$, we can get a flexible output vector, $y$, with a shape that can be modified by changing the weights vector.

$$
y = Xb
$$

The following code illustrates this approach.

```{r, fig.height=8, fig.width=6, basefig = TRUE}
set.seed(1L)
d = 20
n = 2500
X = rbf(n, d)
b = rnorm(d, sd = 0.01)
par(mfrow = c(3, 1)
  , mar = c(0.5, 4, 1, 1) + 0.1
)
matplot(X
  , type = "l", lty = 1, col = 1
  , ylab = "basis functions"
  , axes = FALSE
)
axis(side = 2)
box()
barplot(b
  , xlab = ""
  , ylab = "weights"
)
par(mar = c(5, 4, 1, 1) + 0.1)
plot(X %*% b
  , type = "l"
  , xlab = "time"
  , ylab = "output"
)
```

Here `d` is the dimension of the basis, or number of functions, and `n` is the number of time steps. By multiplying the uniform basis matrix (top panel) by a set of weights (middle panel), we obtain a non-uniform curve (bottom panel). Note how the peaks (troughs) in the output are associated with large positive (negative) weights.

Now we want to transform the output of the (matrix) product of the RBF matrix and the weights vector into a time-series for the transmission rate, $\beta$. Although we could just use the output vector as the $\beta$ time series, it is more convenient to transform it so that the $\beta$ values yield more interesting dynamics in an SIR model. In particular, our model for $\beta_t$ as a function of time, $t$, is

$$
\log(\beta_t) = \log(\gamma_t) + \log(N) - \log(S_t) + x_tb
$$

Here we have the recovery rate, $\gamma_t$, and number of susceptibles, $S_t$, at time, $t$, the total population, $N$, and the $t$th row of $X$, $x_t$. To better understand the rationale for this equation note that if every element of $b$ is set to zero, we have the following condition.

$$
\frac{\beta_t S_t}{N} = \gamma_t
$$

This condition assures that the number of infected individuals remains constant at time, $t$. This means that positive values of $b$ will tend to generate outbreaks and negative values will tend to reduce transmission.

**fixme**: I (BMB) understand why you're setting the model up this way, but it's an odd/non-standard setup - may confuse people who are already familiar with epidemic models (it confused me initially).

Here is a simulation model with a radial basis for exogenous transmission rate dynamics.

```{r sim_rbf}
set.seed(1L)

spec_waning = (sir_waning
             |> mp_tmb_insert(
                 phase = "before"
               , at = Inf
               , expressions = list(eta ~ gamma * exp(X %*% b))
               , default = list(eta = empty_matrix, X = X, b = b)
             ) |> mp_tmb_insert(
                 phase = "during"
               , at = 1
               , expressions = list(beta  ~ eta[time_step(1)] / clamp(S/N, 1/100))
             )
)


simulator_waning = (spec_waning
  |> mp_simulator(
    time_steps = n
  , outputs = c("S", "I", "R", "infection", "beta")
  , default = list(
      N = 100000, I = 500, R = 0
    , beta = 1, gamma = 0.2, phi = 0.01

  ))
)


print(simulator_waning)
```

```{r plot_rbf, fig.height=8, fig.width=6}
(simulator_waning
 |> mp_trajectory()
 |> ggplot()
 + facet_wrap(~ matrix, ncol = 1, scales = 'free')
 + geom_line(aes(time, value))
)
```


### Calibration

We can perform calibration with a time-varying parameter specified with a radial basis, by using the function `macpan2::mp_rbf()`. We follow the ususal steps in calibration.

1\. Simulate from the model and add some poisson noise:

```{r obs_rbf}
obs_rbf = (simulator_waning
 |> mp_trajectory()
 |> filter(matrix=="I")
 |> mutate(across(value, ~ rpois(n(), .)))
)

```


2\. Add calibration information.

```{r calib_rbf}
calib_rbf = mp_tmb_calibrator(sir_waning
   , data = obs_rbf
   , traj = "I"
   ## estimate 
   , par = "beta"
   , tv = mp_rbf("beta", dimension = d, initial_weights = b)
   ## pass all defaults, including dimension of the rbf `d` and initial weights vector `b`
   , default = list(N = 100000, I = 500, R = 0, beta = 1, gamma = 0.2, phi = 0.01, d = d, b = b)

  
)   
mp_optimize(calib_rbf)
## check estimates
mp_tmb_coef(calib_rbf,conf.int = TRUE)
```

3\. Review results.

The fit to the observed data looks reasonable, however there are some obvious wiggly deviations.
```{r rbf_fit, include = FALSE, message = FALSE}
fitted_data = mp_trajectory_sd(calib_rbf, conf.int = TRUE)
```

```{r rbf_fit_plot, echo=FALSE}
## visualize fit
(ggplot(obs_rbf, aes(time,value))
      + geom_point()
      + geom_line(aes(time, value)
      , data = fitted_data
      , colour = "red"
    )
    + geom_ribbon(aes(time, ymin = conf.low, ymax = conf.high)
      , data = fitted_data
      , alpha = 0.2
      , colour = "red"
    )
    + theme_bw()
    + ylab("prevalence")
)

```


## References