mqor coverage - 75.20%

Files
Source

#' Quickly summarise observed (measured) and modelled concentrations for MQO
#' evaluation
#'
#' This function calculates all of the model quality objective indictors (e.g.,
#' [vec_mqi()]) *and* all of the complementary indicators (e.g.,
#' [vec_pi_bias()]) for a given dataset. Further relevant summaries are also
#' calculated; for example, the 90th percentile of `mqi` values, `mqi_90`, is
#' calculated using [mqo_percentile()]. Two or three datasets are returned; an
#' overall `summary` dataset, a `by_site` site-wise summary, and - for long-term
#' data - a `by_type` split fixed/indicative summary.
#'
#' @param data An R `data.frame` containing at least five columns; a numeric
#'   column of observed values, a numeric column of modelled values, a character
#'   or factor column of identifiers that identify the site associated with the
#'   concentrations, a character or factor column of identifiers that identify
#'   the pollutant being measured/modelled, and a character or factor column
#'   containing just `"fixed"` or `"indicative"` to label each site. See
#'   [demo_shortterm] for an example format.
#'
#' @param pollutant The pollutant to calculate model quality objectives for.
#'   Should be a value contained within `data[[dict$pollutant]]`.
#'
#' @param term Either `"short"` or `"long"`, identifying whether `data`
#'   represents Short- or Long-term data. The default, `NULL`, will assume this
#'   from the `data` by checking whether `data[[dict$site]]` contains duplicate
#'   values.
#'
#' @param params_fixed,params_indicative See [mqo_params()] for more
#'   information. Defines fixed and indicative parameters. By default, `{mqor}`
#'   attempts to sample appropriate values from [default_params] using `term`
#'   and `pollutant`.
#'
#' @param dict See [mqo_dict()] for more information. Acts as a data dictionary
#'   to specify the columns in the data `{mqor}` should use.
#'
#' @param table By default this function returns a list of different data
#'   objects. `table` allows a user to select just one. Can be one of
#'   `"summary"`, `"by_type"`, `"by_site"` or `"inputs"`. Note that, if `table`
#'   is provided, the output cannot be then fed into [plot_mqi_scatter()] and
#'   similar functions.
#'
#' @returns a list with at most the objects `summary` showing whole data
#'   summaries (e.g., `mqi_90`), `by_type` which shows per-fixed/indicative
#'   values, `by_site` which shows per-site values (e.g., `mqi`), and `inputs`
#'   which give the input parameters. The columns in each vary depending on the
#'   value of `term`. Just one of these can be selected using the `table`
#'   argument.
#'
#' @family model performance indicators
#'
#' @examples
#' summarise_mqo_stats(
#'   demo_longterm,
#'   pollutant = "PM10"
#' )
#'
#' summarise_mqo_stats(
#'   demo_shortterm,
#'   pollutant = "PM10"
#' )
#'
#' @author Jack Davison
#'
#' @export
summarise_mqo_stats <- function(
  data,
  pollutant,
  term = NULL,
  params_fixed = NULL,
  params_indicative = NULL,
  dict = mqor::mqo_dict(),
  table = NULL
) {
  pollutant <- rlang::arg_match(
    pollutant,
    unique(as.character(data[[dict$pollutant]]))
  )
  data <- data[data[[dict$pollutant]] == pollutant, ]

  data <-
    dplyr::filter(
      data,
      dplyr::if_else(
        all(is.na(.data[[dict$obs]])) | all(is.na(.data[[dict$mod]])),
        true = FALSE,
        false = TRUE
      ),
      .by = c(dict$site)
    )

  term <- determine_term(data, term, dict)

  if (missing(params_fixed)) {
    res <- determine_resolution(data, term, dict)

    params_fixed <-
      mqo_params_default(
        term = term,
        type = "fixed",
        pollutant = pollutant,
        resolution = res
      )
  }

  if (missing(params_indicative)) {
    res <- determine_resolution(data, term, dict)

    params_indicative <-
      mqo_params_default(
        term = term,
        type = "indicative",
        pollutant = pollutant,
        resolution = res
      )
  }

  data_splits <- split(data, data[[dict$type]])

  if (term == "short") {
    site_stats <- calculate_split_stats(
      fun = calc_shortterm_site_stats,
      data = data,
      data_splits = data_splits,
      term = term,
      params_fixed = params_fixed,
      params_indicative = params_indicative,
      obs = dict$obs,
      mod = dict$mod,
      site = dict$site,
      type = dict$type
    )

    group_stats <-
      site_stats |>
      dplyr::summarise(
        mqi_90 = mqo_percentile(.data$mqi, na.rm = TRUE),
        ti_bias_90 = mqo_percentile(abs(.data$ti_bias), na.rm = TRUE),
        ti_cor_90 = mqo_percentile(abs(.data$ti_cor), na.rm = TRUE),
        ti_sd_90 = mqo_percentile(abs(.data$ti_sd), na.rm = TRUE)
      ) |>
      dplyr::mutate(mqo = .data$mqi_90 <= 1, .after = "mqi_90")
  }

  if (term == "long") {
    site_stats <- calculate_split_stats(
      fun = calc_longterm_site_stats,
      data = data,
      data_splits = data_splits,
      term = term,
      params_fixed = params_fixed,
      params_indicative = params_indicative,
      obs = dict$obs,
      mod = dict$mod,
      site = dict$site,
      type = dict$type
    )

    type_stats <- calculate_split_stats(
      fun = calc_longterm_type_stats,
      data = data,
      data_splits = data_splits,
      term = term,
      params_fixed = params_fixed,
      params_indicative = params_indicative,
      obs = dict$obs,
      mod = dict$mod,
      site = dict$site,
      type = dict$type
    )

    group_stats <-
      dplyr::summarise(
        data,
        bias = vec_bias(
          obs = .data[[dict$obs]],
          mod = .data[[dict$mod]],
          na.rm = TRUE
        ),
        rmse = vec_rmse(
          obs = .data[[dict$obs]],
          mod = .data[[dict$mod]],
          na.rm = TRUE
        ),
        r = vec_cor(
          obs = .data[[dict$obs]],
          mod = .data[[dict$mod]],
          na.rm = TRUE
        ),
        sd_obs = vec_sd(
          x = .data[[dict$obs]],
          term = term,
          na.rm = TRUE
        ),
        sd_mod = vec_sd(
          x = .data[[dict$mod]],
          term = term,
          na.rm = TRUE
        )
      )

    uncer <- list()
    if ("fixed" %in% names(data_splits)) {
      uncer <-
        append(
          uncer,
          list(dplyr::mutate(
            data_splits$fixed,
            uncer = scalar_uncer(
              .data[[dict$obs]],
              term = term,
              params = params_fixed
            )
          ))
        )
    }
    if ("indicative" %in% names(data_splits)) {
      uncer <-
        append(
          uncer,
          list(
            dplyr::mutate(
              data_splits$indicative,
              uncer = scalar_uncer(
                .data[[dict$obs]],
                term = term,
                params = params_indicative
              )
            )
          )
        )
    }
    rmsu <-
      uncer |>
      dplyr::bind_rows() |>
      dplyr::summarise(rmsu = vec_rmsu_uncer(uncer, na.rm = TRUE))

    group_stats$rmsu <- rmsu$rmsu
    group_stats$mqi_90 <- mqo_percentile(site_stats$mqi, na.rm = TRUE)
    group_stats$mqo <- group_stats$mqi_90 <= 1
  }

  site_stats[[dict$site]] <- factor(
    site_stats[[dict$site]],
    unique(data[[dict$site]])
  )
  site_stats <- dplyr::arrange(site_stats, .data[[dict$site]])

  out <- list(summary = group_stats)
  if (term == "long") {
    out <- append(out, list(by_type = type_stats))
  }
  out <- append(out, list(by_site = site_stats))

  # rename for consistency
  for (i in seq_along(out)) {
    for (j in seq_along(dict)) {
      names(out[[i]])[names(out[[i]]) == dict[[j]]] <- names(dict)[[j]]
    }
  }

  # append input info
  out <- append(
    out,
    list(
      inputs = list(
        term = term,
        pollutant = pollutant,
        params_fixed = enframe_params(params_fixed),
        params_indicative = enframe_params(params_indicative)
      )
    )
  )

  # return specific table?
  if (!is.null(table)) {
    rlang::arg_match(table, names(out), multiple = FALSE)
    out <- out[[table]]
  } else {
    class(out) <- "mqor_stats"
  }

  return(out)
}

#' @noRd
enframe_params <- function(params) {
  tbl <- dplyr::as_tibble(params)
  if (nrow(tbl) == 0L) {
    tbl <- dplyr::tibble(
      rv = NA_real_,
      u_rv = NA_real_,
      a = NA_real_,
      b = NA_real_
    )
  }
  return(tbl)
}

#' Helper to take data splits from parent function, apply different parmas to
#' each, bind and return.
#' @noRd
calculate_split_stats <- function(
  fun,
  data,
  data_splits,
  term,
  params_fixed,
  params_indicative,
  obs,
  mod,
  site,
  type
) {
  site_stats <- list()

  if ("fixed" %in% names(data_splits)) {
    if (is.null(params_fixed)) {
      cli::cli_abort(
        "'fixed' data has been identified; please provide parameters via {.field params_fixed}."
      )
    }

    site_stats <-
      append(
        site_stats,
        list(
          fun(
            data = data_splits$fixed,
            params = params_fixed,
            term = term,
            obs = obs,
            mod = mod,
            site = site,
            type = type
          )
        )
      )
  }

  if ("indicative" %in% names(data_splits)) {
    if (is.null(params_indicative)) {
      cli::cli_abort(
        "'indicative' data has been identified; please provide parameters via {.field params_indicative}."
      )
    }

    site_stats <-
      append(
        site_stats,
        list(
          fun(
            data = data_splits$indicative,
            params = params_indicative,
            term = term,
            obs = obs,
            mod = mod,
            site = site,
            type = type
          )
        )
      )
  }

  site_stats <- dplyr::bind_rows(site_stats)

  return(site_stats)
}

#' Helper for short-term site stats
#' @noRd
calc_shortterm_site_stats <- function(
  data,
  term,
  params,
  obs,
  mod,
  site,
  type
) {
  dplyr::summarise(
    data,
    mean_obs = mean(.data[[obs]], na.rm = TRUE),
    mean_mod = mean(.data[[mod]], na.rm = TRUE),
    bias = vec_bias(
      obs = .data[[obs]],
      mod = .data[[mod]],
      na.rm = TRUE
    ),
    rmse = vec_rmse(
      obs = .data[[obs]],
      mod = .data[[mod]],
      na.rm = TRUE
    ),
    rmsu = vec_rmsu(
      obs = .data[[obs]],
      term = term,
      params = params,
      na.rm = TRUE
    ),
    r = vec_cor(
      obs = .data[[obs]],
      mod = .data[[mod]],
      na.rm = TRUE
    ),
    sd_obs = vec_sd(x = .data[[obs]], term = term, na.rm = TRUE),
    sd_mod = vec_sd(x = .data[[mod]], term = term, na.rm = TRUE),
    crmse = vec_crmse(
      obs = .data[[obs]],
      mod = .data[[mod]],
      na.rm = TRUE
    ),
    mqi = vec_mqi2(
      rmsu = .data$rmsu,
      err = abs(.data$rmse),
      b = params$b
    ),
    ti_bias = vec_pi_bias2(
      bias = .data$bias,
      rmsu = .data$rmsu,
      b = params$b
    ),
    ti_cor = vec_pi_cor2(
      cor = .data$r,
      sd_o = .data$sd_obs,
      sd_m = .data$sd_mod,
      rmsu = .data$rmsu,
      b = params$b
    ),
    ti_sd = vec_pi_sd2(
      sd_o = .data$sd_obs,
      sd_m = .data$sd_mod,
      rmsu = .data$rmsu,
      b = params$b
    ),
    ti_crmse = vec_ti_crmse2(
      mqi = .data$mqi,
      tib = .data$ti_bias
    ),
    .by = dplyr::all_of(c(site, type))
  ) |>
    dplyr::mutate(
      rmsu_star = .data$rmsu * sqrt(1 + params$b^2),
      .after = "rmsu"
    )
}

#' @noRd
calc_longterm_site_stats <- function(
  data,
  term,
  params,
  obs,
  mod,
  site,
  type
) {
  dplyr::summarise(
    data,
    obs = .data[[obs]],
    mod = .data[[mod]],
    uncer = scalar_uncer(
      obs = .data[[obs]],
      term = term,
      params = params,
      na.rm = TRUE
    ),
    ar_low = .data[[obs]] - sqrt(1 + params$b^2) * .data$uncer,
    ar_up = .data[[obs]] + sqrt(1 + params$b^2) * .data$uncer,
    mqi = vec_mqi(
      obs = .data[[obs]],
      mod = .data[[mod]],
      term = term,
      params = params,
      na.rm = TRUE
    ),
    .by = dplyr::all_of(c(site, type))
  )
}

#' @noRd
calc_longterm_type_stats <- function(
  data,
  term,
  params,
  obs,
  mod,
  site,
  type
) {
  dplyr::summarise(
    data,
    si_rmse = vec_si_rmse(
      obs = .data[[obs]],
      mod = .data[[mod]],
      term = term,
      params = params,
      na.rm = TRUE
    ),
    si_bias = vec_pi_bias(
      obs = .data[[obs]],
      mod = .data[[mod]],
      term = term,
      params = params,
      na.rm = TRUE
    ),
    si_cor = vec_pi_cor(
      obs = .data[[obs]],
      mod = .data[[mod]],
      term = term,
      params = params,
      na.rm = TRUE
    ),
    si_sd = vec_pi_sd(
      obs = .data[[obs]],
      mod = .data[[mod]],
      term = term,
      params = params,
      na.rm = TRUE
    ),
    .by = dplyr::all_of(type)
  )
}

#' Statistical performance indicators
#'
#' These functions are provided for convenient calculation of modelling the
#' listed model quality objectives performance indicators. All functions work on
#' numeric R vectors, containing modelled or measured concentrations at a single
#' sampling point, or averaged modelled or measured concentrations across
#' multiple sampling points.
#' - Bias ([vec_bias()])
#' - Root Mean Square Error ([vec_rmse()])
#' - Maximum Accepted Measurement Uncertainty ([scalar_uncer()])
#' - Root Mean Square Uncertainty ([vec_rmsu()])
#' - Pearson's Correlation coefficient ([vec_cor()])
#' - Standard Deviation ([vec_sd()])
#' - Centred Root Mean Square Error ([vec_crmse()])
#' - MQI ([vec_mqi()])
#'
#' @param obs,mod,x A numeric vector of observed (measured) concentrations
#'   (`obs`), modelled concentrations (`mod`), or either of the previous
#'   depending on desired output (`x`).
#' @param term Either `"short"` or `"long"`, identifying whether `data`
#'   represents Short- or Long-term data.
#' @param params A set of `mqor` parameters, most simply constructed using
#'   [mqo_params()]. See [mqo_params()] for more information about how
#'   parameter sets can be constructed.
#' @inheritParams base::mean
#'
#' @family model performance indicators
#'
#' @returns All `vec_` functions return a single numeric value. `scalar_`
#'   functions return a numeric vector the same length as `obs` or `mod`.
#'
#' @rdname statistical-performance-indicators
#' @order 1
#' @export
vec_bias <- function(obs, mod, na.rm = FALSE) {
  validate_equal(obs, mod)
  mean(mod, na.rm = na.rm) - mean(obs, na.rm = na.rm)
}

#' @rdname statistical-performance-indicators
#' @order 2
#' @export
vec_rmse <- function(obs, mod, na.rm = FALSE) {
  validate_equal(obs, mod)
  summed <- sum((obs - mod)^2, na.rm = na.rm)
  len <- length(obs)
  sqrt(summed / len)
}

#' @rdname statistical-performance-indicators
#' @order 3
#' @export
scalar_uncer <- function(obs, term, params = mqo_params(), na.rm = FALSE) {
  term <- validate_term(term)
  params$u_rv * sqrt((1 - params$a^2) * obs^2 + (params$a^2 * params$rv^2))
}

#' @rdname statistical-performance-indicators
#' @order 4
#' @export
vec_rmsu <- function(obs, term, params = mqo_params(), na.rm = TRUE) {
  term <- validate_term(term)
  uncer <- scalar_uncer(obs = obs, term = term, params = params, na.rm = na.rm)
  vec_rmsu_uncer(uncer, na.rm)
}

#' Non-exported version of vec_rmsu that takes an uncertainty vector
#' Needed for mixed type networks
#' @noRd
vec_rmsu_uncer <- function(uncer, na.rm) {
  len <- length(uncer)
  sqrt(sum(uncer^2, na.rm = na.rm) / len)
}

#' @rdname statistical-performance-indicators
#' @order 5
#' @export
vec_cor <- function(obs, mod, na.rm = FALSE) {
  validate_equal(obs, mod)

  m_diff <- mod - mean(mod, na.rm = na.rm)
  o_diff <- obs - mean(obs, na.rm = na.rm)
  top <- sum(m_diff * o_diff, na.rm = na.rm)

  m_bottom <- sqrt(sum((mod - mean(mod, na.rm = na.rm))^2, na.rm = na.rm))
  o_bottom <- sqrt(sum((obs - mean(obs, na.rm = na.rm))^2, na.rm = na.rm))

  top / (m_bottom * o_bottom)
}

#' @rdname statistical-performance-indicators
#' @order 6
#' @export
vec_sd <- function(x, term, na.rm = FALSE) {
  term <- validate_term(term)
  summed <- sum((x - mean(x, na.rm = na.rm))^2, na.rm = na.rm)
  len <- length(x)
  if (term == "long") {
    len <- len - 1L
  }
  sqrt(summed / len)
}

#' @rdname statistical-performance-indicators
#' @order 7
#' @export
vec_crmse <- function(obs, mod, na.rm = FALSE) {
  validate_equal(obs, mod)
  m_diff <- mod - mean(mod, na.rm = na.rm)
  o_diff <- obs - mean(obs, na.rm = na.rm)
  diffs <- m_diff - o_diff
  diffs_summed <- sum(diffs^2, na.rm = na.rm)
  len <- length(obs)
  sqrt(diffs_summed / len)
}

#' @rdname statistical-performance-indicators
#' @order 8
#' @export
vec_mqi <- function(obs, mod, term, params = mqo_params(), na.rm = FALSE) {
  validate_equal(obs, mod)
  term <- validate_term(term)

  rmsu <- vec_rmsu(obs = obs, term = term, params = params, na.rm = na.rm)

  if (term == "short") {
    top <- vec_rmse(obs = obs, mod = mod, na.rm = na.rm)
  } else {
    top <- abs(vec_bias(
      obs = obs,
      mod = mod,
      na.rm = na.rm
    ))
  }

  fct <- sqrt(1 + params$b^2)
  top / (fct * rmsu)
}

#' Calculate the MQI from existing RMSU & error
#' @noRd
vec_mqi2 <- function(rmsu, err, b) {
  fct <- sqrt(1 + b^2)
  err / (fct * rmsu)
}

#' Complementary performance indicators
#'
#' These functions complement the MQI/MQO procedure outlined using [vec_mqi()],
#' and are used in internally by [plot_mqi_scatter()].
#'
#' The following indicators are available for both long-term and short-term
#' data:
#' - Bias performance indicator ([vec_pi_bias()])
#' - Correlation performance indicator ([vec_pi_cor()])
#' - Standard deviation performance indicator ([vec_pi_sd()])
#'
#' The following 'temporal' (short-term) indicators are available:
#' - Centred Root Mean Square Error temporal indicator ([vec_ti_crmse()])
#'
#' The following 'spatial' (long-term) indicators are available:
#' - Root Mean Square Error spatial indicator ([vec_si_rmse()])
#'
#' @param obs,mod A numeric vector of observed (measured) concentrations (`obs`)
#'   or modelled concentrations (`mod`).
#'
#' @inheritParams vec_bias
#'
#' @family model performance indicators
#'
#' @rdname complementary-performance-indicators
#' @order 1
#' @export
vec_pi_bias <- function(obs, mod, term, params, na.rm = FALSE) {
  validate_equal(obs, mod)
  term <- validate_term(term)

  bias <- vec_bias(obs = obs, mod = mod, na.rm = na.rm)

  rmsu <- vec_rmsu(obs = obs, term = term, params = params, na.rm = na.rm)

  beta <- sqrt(1 + params$b^2)

  bias / (beta * rmsu)
}

#' @noRd
vec_pi_bias2 <- function(bias, rmsu, b) {
  bias / (sqrt(1 + b^2) * rmsu)
}

#' @rdname complementary-performance-indicators
#' @order 2
#' @export
vec_pi_cor <- function(obs, mod, term, params, na.rm = FALSE) {
  validate_equal(obs, mod)
  term <- validate_term(term)
  R <- vec_cor(obs, mod, na.rm = na.rm)
  sd_o <- vec_sd(obs, term, na.rm = na.rm)
  sd_m <- vec_sd(mod, term, na.rm = na.rm)
  rmsu2 <- vec_rmsu(obs = obs, term = term, params = params, na.rm = na.rm)^2
  beta <- 1 + params$b^2
  (1 - R) * ((2 * sd_o * sd_m) / (beta * rmsu2))
}

#' @noRd
vec_pi_cor2 <- function(cor, sd_o, sd_m, rmsu, b) {
  beta <- 1 + b^2
  rmsu2 <- rmsu^2
  (1 - cor) * ((2 * sd_o * sd_m) / (beta * rmsu2))
}

#' @rdname complementary-performance-indicators
#' @order 3
#' @export
vec_pi_sd <- function(obs, mod, term, params, na.rm = FALSE) {
  validate_equal(obs, mod)
  term <- validate_term(term)
  sd_o <- vec_sd(obs, term, na.rm = na.rm)
  sd_m <- vec_sd(mod, term, na.rm = na.rm)
  rmsu <- vec_rmsu(obs = obs, term = term, params = params, na.rm = na.rm)
  beta <- sqrt(1 + params$b^2)
  (sd_m - sd_o) / (beta * rmsu)
}

#' @noRd
vec_pi_sd2 <- function(sd_o, sd_m, rmsu, b) {
  beta <- sqrt(1 + b^2)
  (sd_m - sd_o) / (beta * rmsu)
}

#' @rdname complementary-performance-indicators
#' @order 4
#' @export
vec_ti_crmse <- function(obs, mod, term, params, na.rm = FALSE) {
  validate_equal(obs, mod)
  term <- validate_term(term)
  if (term == "long") {
    cli::cli_abort(
      "This performance indicator is not supported for long-term data."
    )
  }

  mqi <- vec_mqi(
    obs = obs,
    mod = mod,
    term = term,
    params = params,
    na.rm = na.rm
  )

  tib <- vec_pi_bias(
    obs = obs,
    mod = mod,
    term = term,
    params = params,
    na.rm = na.rm
  )

  sqrt(mqi^2 - tib^2)
}

#' @noRd
vec_ti_crmse2 <- function(mqi, tib) {
  sqrt(mqi^2 - tib^2)
}

#' @rdname complementary-performance-indicators
#' @order 5
#' @export
vec_si_rmse <- function(obs, mod, term, params, na.rm = FALSE) {
  validate_equal(obs, mod)
  term <- validate_term(term)

  if (term == "short") {
    cli::cli_abort(
      "This performance indicator is not supported for short-term data."
    )
  }

  rmse <- vec_rmse(obs = obs, mod = mod, na.rm = na.rm)

  rmsu <- vec_rmsu(obs = obs, term = term, params = params, na.rm = na.rm)

  beta <- sqrt(1 + params$b^2)

  rmse / (beta * rmsu)
}

#' Function to ensure vectors are of equal length
#' @noRd
validate_equal <- function(obs, mod) {
  if (length(mod) != length(obs)) {
    cli::cli_abort(
      "The length of observed values, obs, must be equal to the length of the modelled values, mod."
    )
  }
}

#' Check term is appropriate
#' @noRd
validate_term <- function(x) {
  rlang::arg_match(x, c("short", "long"))
}

#' Check 'type' col only has either 'fixed' or 'indicative'
#' @noRd
validate_type <- function(x) {
  if (any(!x %in% c("fixed", "indicative"))) {
    cli::cli_abort(
      "The {.field type} column should only contain either 'fixed' or 'indicative'."
    )
  }
}

#' Plot a scatter chart visualising the Modelling Quality Indicator (MQI)
#'
#' This function produces a plot comparing modelled and measured concentrations.
#' For long-term data, a traditional scatter plot of observed vs modelled
#' concentrations is plotted with a shaded area showing the acceptability range
#' and dashed boundaries showing the maximum accepted measurement uncertainty.
#' For short-term data, a 'bullseye' scatter plot is plotted showing two
#' complementary performance indicators with a shaded area showing the MQO
#' fulfilment area and a dashed boundary showing the maximum accepted
#' measurement uncertainty.
#'
#' @inheritParams plot_comparison_bars
#'
#' @param stats_shortterm,stats_longterm The output of [summarise_mqo_stats()].
#'   All relevant information (e.g., `term`, `params_fixed`, etc.) will be
#'   passed to this function. `stats_shortterm` should contain short-term
#'   (temporal) statistics, `stats_longterm` should contain long-term (spatial)
#'   statistics. Either of these can be `NULL`, but at least one should be
#'   provided.
#'
#' @param show_dashed Should extra dashed lines be shown on the figures? These
#'   show the maximum accepted measurement uncertainty, but can overcrowd the
#'   plotting area.
#'
#' @param color_fixed,color_indicative,color_outline The colours to use for
#'   fixed data, indicative data, and any another annotations. Can be expressed
#'   as hex codes, or any colours listed in [colors()].
#'
#' @param gp A [grid::gpar()] object passed to [grid::textGrob()], used to
#'   control the appearance of the parameter & indicator annotations. `fontsize`
#'   is likely the most useful argument, but many options are available through
#'   [grid::gpar()]. Only applies when `interactive = FALSE`.
#'
#' @family plotting functions
#'
#' @author Jack Davison
#'
#' @examples
#' long <- summarise_mqo_stats(demo_longterm, pollutant = "PM10")
#'
#' short <- summarise_mqo_stats(demo_shortterm, pollutant = "PM10")
#'
#' plot_mqi_scatter(stats_shortterm = short)
#'
#' plot_mqi_scatter(stats_shortterm = short, stats_longterm = long)
#'
#' plot_mqi_scatter(stats_longterm = long)
#'
#' @export
plot_mqi_scatter <- function(
  stats_shortterm = NULL,
  stats_longterm = NULL,
  show_dashed = FALSE,
  color_fixed = "#4269D0",
  color_indicative = "#EFB118",
  color_outline = "black",
  interactive = FALSE,
  gp = grid::gpar(fontsize = 10)
) {
  validate_mqor_stats(stats_shortterm)
  validate_mqor_stats(stats_longterm)
  if (!is.null(stats_shortterm)) {
    term <- "short"
    params_fixed <- stats_shortterm$inputs$params_fixed
    params_indicative <- stats_shortterm$inputs$params_indicative

    if (!"fixed" %in% stats_shortterm$by_site$type) {
      params_fixed <- NULL
    }
    if (!"indicative" %in% stats_shortterm$by_site$type) {
      params_indicative <- NULL
    }

    uncer <-
      dplyr::bind_rows(
        "fixed" = make_circle(1 / sqrt(1 + params_fixed$b^2)),
        "indicative" = make_circle(1 / sqrt(1 + params_indicative$b^2)),
        .id = "type"
      )

    res <- paste0(
      round(stats_shortterm$summary$mqi_90, 2),
      " (",
      ifelse(stats_shortterm$summary$mqo, "Pass", "Fail"),
      ")"
    )

    if (!is.null(stats_longterm)) {
      res2 <- paste0(
        round(stats_longterm$summary$mqi_90, 2),
        " (",
        ifelse(stats_longterm$summary$mqo, "Pass", "Fail"),
        ")"
      )
    }

    if (interactive) {
      if (!is.null(stats_longterm)) {
        html <-
          glue::glue(
            "
            <b>MQI<sub>{term},90th</sub> = {res}</b><br>
            <b>MQI<sub>long,90th</sub> = {res2}</b><br>
            &#945;<sub>{term}</sub> = {combine(params_fixed$a, params_indicative$a)}<br>
            &#946;<sub>{term}</sub> = {combine(params_fixed$b, params_indicative$b)}<br>
            RV<sub>{term}</sub> = {combine(params_fixed$rv, params_indicative$rv)}<br>
            U<sub>0r</sub>(RV<sub>{term}</sub>) = {combine(params_fixed$rv, params_indicative$rv)}<br>
            "
          )
      } else {
        html <-
          glue::glue(
            "<b>MQI<sub>{term},90th</sub> = {res}</b><br>
        &#945;<sub>{term}</sub> = {combine(params_fixed$a, params_indicative$a)}<br>
        &#946;<sub>{term}</sub> = {combine(params_fixed$b, params_indicative$b)}<br>
        RV<sub>{term}</sub> = {combine(params_fixed$rv, params_indicative$rv)}<br>
        U<sub>0r</sub>(RV<sub>{term}</sub>) = {combine(params_fixed$rv, params_indicative$rv)}<br>"
          )
      }

      html2 <-
        glue::glue(
          '
          TI<sub>B</sub> = {fmt_pi(stats_shortterm$summary, "ti_bias_90")}<br>
          TI<sub>R</sub> = {fmt_pi(stats_shortterm$summary, "ti_cor_90")}<br>
          TI<sub>SD</sub> = {fmt_pi(stats_shortterm$summary, "ti_sd_90")}<br>
          '
        )

      # get outline color RGB - for the highlight area
      color_outline_rgb <- grDevices::col2rgb(col = color_outline)

      plt <-
        plotly::plot_ly(
          colors = c(
            "fixed" = color_fixed,
            "indicative" = color_indicative
          )
        ) |>
        plotly::add_ribbons(
          data = make_circle(1),
          x = ~x,
          ymin = ~ymin,
          ymax = ~ymax,
          color = I(color_outline),
          fillcolor = grDevices::rgb(
            red = color_outline_rgb[["red", 1]],
            green = color_outline_rgb[["green", 1]],
            blue = color_outline_rgb[["blue", 1]],
            maxColorValue = 255,
            alpha = 255 * 0.05
          ),
          showlegend = FALSE,
          hoverinfo = "skip"
        )

      if (show_dashed) {
        plt <-
          plotly::add_ribbons(
            p = plt,
            data = uncer,
            x = ~x,
            ymin = ~ymin,
            ymax = ~ymax,
            color = ~type,
            line = list(dash = "dot"),
            fillcolor = grDevices::rgb(0, 0, 0, 0),
            legendgroup = ~type,
            showlegend = FALSE
          )
      }

      plt <-
        plt |>
        plotly::add_markers(
          x = stats_shortterm$by_site$ti_crmse,
          y = stats_shortterm$by_site$ti_bias,
          color = stats_shortterm$by_site$type,
          text = stats_shortterm$by_site$site,
          legendgroup = stats_shortterm$by_site$type
        ) |>
        plotly::layout(
          xaxis = list(
            range = range(c(-2, 2)),
            constrain = "domain",
            title = "TI<sub>CRMSE</sub>"
          ),
          yaxis = list(
            range = range(c(-2, 2)),
            constrain = "domain",
            scaleanchor = "x",
            scaleratio = 1,
            title = "TI<sub>BIAS</sub>"
          ),
          legend = list(
            orientation = "h",
            y = 1.1
          )
        ) |>
        plotly::add_annotations(
          text = html,
          y = 0.8,
          x = 0.25,
          yref = "y domain",
          xref = "x domain",
          align = "left",
          valign = "top"
        ) |>
        plotly::add_annotations(
          text = html2,
          y = 0.1,
          x = 0.9,
          yref = "y domain",
          xref = "x domain",
          align = "right",
          valign = "bottom"
        )
    } else {
      sub <- paste(term, "90th", sep = ",")

      grob_mqi <- grid::textGrob(bquote(MQI[.(sub)] == .(res)), gp = gp)

      if (!is.null(stats_longterm)) {
        sub2 <- paste("long", "90th", sep = ",")
        grob_mqi2 <- grid::textGrob(bquote(MQI[.(sub2)] == .(res2)), gp = gp)
      } else {
        grob_mqi2 <- grid::textGrob("")
      }

      grob_a <- grid::textGrob(
        bquote(
          alpha[.(term)] ==
            .(combine(
              params_fixed$a,
              params_indicative$a
            ))
        ),
        gp = gp
      )
      grob_b <- grid::textGrob(
        bquote(
          beta[.(term)] == .(combine(params_fixed$b, params_indicative$b))
        ),
        gp = gp
      )
      grob_rv <- grid::textGrob(
        bquote(
          RV[.(term)] == .(combine(params_fixed$rv, params_indicative$rv))
        ),
        gp = gp
      )
      grob_urv <- grid::textGrob(
        bquote(
          U[Or](RV[.(term)]) ==
            .(combine(params_fixed$u_rv, params_indicative$u_rv, TRUE))
        ),
        gp = gp
      )

      grob_tib <- grid::textGrob(
        bquote(TI[B] == .(fmt_pi(stats_shortterm$summary, "ti_bias_90"))),
        gp = gp
      )
      grob_tir <- grid::textGrob(
        bquote(TI[R] == .(fmt_pi(stats_shortterm$summary, "ti_cor_90"))),
        gp = gp
      )
      grob_tis <- grid::textGrob(
        bquote(TI[SD] == .(fmt_pi(stats_shortterm$summary, "ti_sd_90"))),
        gp = gp
      )

      plt <-
        ggplot2::ggplot(stats_shortterm$by_site) +
        ggplot2::geom_ribbon(
          data = make_circle(1),
          ggplot2::aes(
            x = .data$x,
            ymin = .data$ymin,
            ymax = .data$ymax
          ),
          fill = color_outline,
          color = color_outline,
          alpha = 0.05
        )

      if (show_dashed) {
        plt <-
          plt +
          ggplot2::geom_ribbon(
            data = uncer,
            ggplot2::aes(
              x = .data$x,
              ymin = .data$ymin,
              ymax = .data$ymax,
              color = .data$type
            ),
            fill = NA,
            lty = 2,
            show.legend = dplyr::n_distinct(stats_shortterm$by_site$type) > 1L
          )
      }

      plt <- plt +
        ggplot2::geom_hline(yintercept = 0) +
        ggplot2::geom_vline(xintercept = 0) +
        ggplot2::geom_point(
          ggplot2::aes(
            x = .data$ti_crmse,
            y = .data$ti_bias,
            color = .data$type
          ),
          show.legend = dplyr::n_distinct(stats_shortterm$by_site$type) > 1L
        ) +
        ggplot2::coord_equal() +
        ggplot2::scale_x_continuous(
          expand = ggplot2::expansion(),
          limits = c(-2, 2)
        ) +
        ggplot2::scale_y_continuous(
          expand = ggplot2::expansion(),
          limits = c(-2, 2)
        ) +
        ggplot2::theme_bw(14) +
        ggplot2::labs(
          x = expression(TI[CRMSE]),
          y = expression(TI[BIAS]),
          color = NULL
        ) +
        ggplot2::theme(
          legend.justification = "left",
          legend.spacing = ggplot2::unit(0.2, "cm"),
          legend.margin = ggplot2::margin(0, 0, 0, 0, unit = "cm"),
          legend.title = ggplot2::element_text(face = "bold")
        ) +
        ggplot2::scale_color_manual(
          values = c(
            "fixed" = color_fixed,
            "indicative" = color_indicative
          ),
          aesthetics = c("fill", "colour")
        ) +
        ggplot2::guides(
          fill = ggplot2::guide_legend(position = "top"),
          custom_mqi = ggplot2::guide_custom(grob_mqi, order = 1),
          custom_mqi2 = ggplot2::guide_custom(grob_mqi2, order = 2),
          custom_a = ggplot2::guide_custom(
            grob_a,
            order = 3,
            title = "\nUncertainty\nParameters"
          ),
          custom_b = ggplot2::guide_custom(grob_b, order = 4),
          custom_rv = ggplot2::guide_custom(grob_rv, order = 5),
          custom_urv = ggplot2::guide_custom(grob_urv, order = 6),
          custom_tib = ggplot2::guide_custom(
            grob_tib,
            order = 7,
            title = "\nComplementary\nIndicators"
          ),
          custom_tir = ggplot2::guide_custom(grob_tir, order = 8),
          custom_tis = ggplot2::guide_custom(grob_tis, order = 9)
        )
    }
  } else if (!is.null(stats_longterm)) {
    term <- "long"
    params_fixed <- stats_longterm$inputs$params_fixed
    params_indicative <- stats_longterm$inputs$params_indicative

    max_om <- max(
      c(stats_longterm$by_site$obs, stats_longterm$by_site$mod),
      na.rm = TRUE
    )

    build_uncer_bands <- function(params) {
      if (is.null(params)) {
        return(data.frame(
          obs = numeric(),
          u = numeric(),
          ar = numeric()
        ))
      }
      range_o <- pretty(0:max_om)
      diff_o <- diff(range_o[1:2])

      uncer <-
        data.frame(obs = pretty(n = 100, (0 - diff_o):(max_om + diff_o))) |>
        dplyr::rowwise() |>
        dplyr::mutate(
          u = vec_rmsu(
            .data$obs,
            term = term,
            params = params,
            na.rm = TRUE
          ),
          ar = sqrt(1 + params$b^2) * .data$u
        ) |>
        dplyr::ungroup()

      return(uncer)
    }

    if (!"fixed" %in% stats_longterm$by_site$type) {
      params_fixed <- NULL
    }
    if (!"indicative" %in% stats_longterm$by_site$type) {
      params_indicative <- NULL
    }

    uncer <-
      dplyr::bind_rows(
        "fixed" = build_uncer_bands(params_fixed),
        "indicative" = build_uncer_bands(params_indicative),
        .id = "type"
      )

    res <- paste0(
      round(stats_longterm$summary$mqi_90, 2),
      " (",
      ifelse(stats_longterm$summary$mqo, "Pass", "Fail"),
      ")"
    )

    if (interactive) {
      html <-
        glue::glue(
          "<b>MQI<sub>{term},90th</sub> = {res}</b><br>
        &#945;<sub>{term}</sub> = {combine(params_fixed$a, params_indicative$a)}<br>
        &#946;<sub>{term}</sub> = {combine(params_fixed$b, params_indicative$b)}<br>
        RV<sub>{term}</sub> = {combine(params_fixed$rv, params_indicative$rv)}<br>
        U<sub>0r</sub>(RV<sub>{term}</sub>) = {combine(params_fixed$rv, params_indicative$rv)}<br>"
        )

      html2 <-
        glue::glue(
          '
          SI<sub>RMSE</sub> = {fmt_pi(stats_longterm$by_type, "si_rmse")}<br>
          SI<sub>B</sub> = {fmt_pi(stats_longterm$by_type, "si_bias")}<br>
          SI<sub>R</sub> = {fmt_pi(stats_longterm$by_type, "si_cor")}<br>
          SI<sub>SD</sub> = {fmt_pi(stats_longterm$by_type, "si_sd")}<br>
          '
        )

      plt <-
        plotly::plot_ly(
          colors = c(
            "fixed" = color_fixed,
            "indicative" = color_indicative
          )
        ) |>
        plotly::add_lines(
          x = ceiling(c(0, max_om)),
          y = ceiling(c(0, max_om)),
          color = I(color_outline),
          hoverinfo = "skip",
          showlegend = FALSE
        ) |>
        plotly::add_ribbons(
          data = uncer,
          x = ~obs,
          ymin = ~ obs - ar,
          ymax = ~ obs + ar,
          color = ~type,
          legendgroup = ~type,
          opacity = 0.5
        )

      if (show_dashed) {
        plt <-
          plotly::add_ribbons(
            p = plt,
            data = uncer,
            x = ~obs,
            ymin = ~ obs - u,
            ymax = ~ obs + u,
            color = ~type,
            line = list(dash = "dot"),
            fillcolor = grDevices::rgb(0, 0, 0, 0),
            legendgroup = ~type,
            opacity = 0.5,
            showlegend = FALSE
          )
      }

      plt <- plt |>
        plotly::add_markers(
          x = stats_longterm$by_site$obs,
          y = stats_longterm$by_site$mod,
          color = stats_longterm$by_site$type,
          text = stats_longterm$by_site$site
        ) |>
        plotly::layout(
          xaxis = list(
            range = range(pretty(c(0, max_om))),
            constrain = "domain",
            title = "observed concentrations (&mu;g&nbsp;m<sup>-3</sup>)"
          ),
          yaxis = list(
            range = range(pretty(c(0, max_om))),
            constrain = "domain",
            scaleanchor = "x",
            scaleratio = 1,
            title = "modelled concentrations (&mu;g&nbsp;m<sup>-3</sup>)"
          ),
          legend = list(
            orientation = "h",
            y = 1.1
          )
        ) |>
        plotly::add_annotations(
          text = html,
          y = 0.8,
          x = 0.25,
          yref = "y domain",
          xref = "x domain",
          align = "left",
          valign = "top"
        ) |>
        plotly::add_annotations(
          text = html2,
          y = 0.1,
          x = 0.9,
          yref = "y domain",
          xref = "x domain",
          align = "right",
          valign = "bottom"
        )
    } else {
      sub <- paste(term, "90th", sep = ",")

      grob_mqi <- grid::textGrob(bquote(MQI[.(sub)] == .(res)), gp = gp)

      grob_a <- grid::textGrob(
        bquote(
          alpha[.(term)] == .(combine(params_fixed$a, params_indicative$a))
        ),
        gp = gp
      )
      grob_b <- grid::textGrob(
        bquote(
          beta[.(term)] == .(combine(params_fixed$b, params_indicative$b))
        ),
        gp = gp
      )
      grob_rv <- grid::textGrob(
        bquote(
          RV[.(term)] == .(combine(params_fixed$rv, params_indicative$rv))
        ),
        gp = gp
      )
      grob_urv <- grid::textGrob(
        bquote(
          U[Or](RV[.(term)]) ==
            .(combine(params_fixed$u_rv, params_indicative$u_rv, TRUE))
        ),
        gp = gp
      )

      grob_siu <- grid::textGrob(
        bquote(SI[RMSE] == .(fmt_pi(stats_longterm$by_type, "si_rmse"))),
        gp = gp
      )
      grob_sib <- grid::textGrob(
        bquote(SI[B] == .(fmt_pi(stats_longterm$by_type, "si_bias"))),
        gp = gp
      )
      grob_sir <- grid::textGrob(
        bquote(SI[R] == .(fmt_pi(stats_longterm$by_type, "si_cor"))),
        gp = gp
      )
      grob_sis <- grid::textGrob(
        bquote(SI[SD] == .(fmt_pi(stats_longterm$by_type, "si_sd"))),
        gp = gp
      )

      plt <-
        stats_longterm$by_site |>
        ggplot2::ggplot(ggplot2::aes(
          x = .data$obs,
          y = .data$mod
        )) +
        ggplot2::coord_equal(xlim = c(0, max_om), ylim = c(0, max_om)) +
        ggplot2::geom_ribbon(
          data = uncer,
          ggplot2::aes(
            ymin = .data$obs - .data$ar,
            ymax = .data$obs + .data$ar,
            x = .data$obs,
            fill = .data$type
          ),
          inherit.aes = F,
          show.legend = dplyr::n_distinct(stats_longterm$by_site$type) > 1L,
          alpha = 0.15
        ) +
        ggplot2::geom_abline(color = color_outline) +
        ggplot2::geom_line(
          data = uncer,
          ggplot2::aes(
            x = .data$obs,
            y = .data$obs + .data$ar,
            color = .data$type
          ),
          show.legend = FALSE
        ) +
        ggplot2::geom_line(
          data = uncer,
          ggplot2::aes(
            x = .data$obs,
            y = .data$obs - .data$ar,
            color = .data$type
          ),
          show.legend = FALSE
        )

      if (show_dashed) {
        plt <-
          plt +
          ggplot2::geom_line(
            data = uncer,
            ggplot2::aes(
              x = .data$obs,
              y = .data$obs + .data$u,
              color = .data$type
            ),
            show.legend = FALSE,
            lty = 2
          ) +
          ggplot2::geom_line(
            data = uncer,
            ggplot2::aes(
              x = .data$obs,
              y = .data$obs - .data$u,
              color = .data$type
            ),
            show.legend = FALSE,
            lty = 2
          )
      }

      plt <-
        plt +
        ggplot2::geom_point(
          size = 3,
          ggplot2::aes(color = .data$type),
          show.legend = FALSE
        ) +
        ggplot2::scale_x_continuous(expand = ggplot2::expansion(c(0, .1))) +
        ggplot2::scale_y_continuous(expand = ggplot2::expansion(c(0, .1))) +
        ggplot2::theme_bw(14) +
        ggplot2::labs(
          x = format_text("observed concentrations (ug/m3)"),
          y = format_text("modelled concentrations (ug/m3)"),
          fill = NULL,
          color = NULL
        ) +
        ggplot2::theme(
          legend.justification = "left",
          legend.spacing = ggplot2::unit(0.2, "cm"),
          legend.margin = ggplot2::margin(0, 0, 0, 0, unit = "cm"),
          legend.title = ggplot2::element_text(face = "bold")
        ) +
        ggplot2::scale_color_manual(
          values = c(
            "fixed" = color_fixed,
            "indicative" = color_indicative
          ),
          aesthetics = c("fill", "colour")
        ) +
        ggplot2::guides(
          fill = ggplot2::guide_legend(position = "top"),
          custom_mqi = ggplot2::guide_custom(grob_mqi, order = 1),
          custom_a = ggplot2::guide_custom(
            grob_a,
            order = 2,
            title = "\nUncertainty\nParameters"
          ),
          custom_b = ggplot2::guide_custom(grob_b, order = 3),
          custom_rv = ggplot2::guide_custom(grob_rv, order = 4),
          custom_urv = ggplot2::guide_custom(grob_urv, order = 5),
          custom_siu = ggplot2::guide_custom(
            grob_siu,
            order = 6,
            title = "\nComplementary\nIndicators"
          ),
          custom_sib = ggplot2::guide_custom(grob_sib, order = 7),
          custom_sir = ggplot2::guide_custom(grob_sir, order = 8),
          custom_sis = ggplot2::guide_custom(grob_sis, order = 9)
        )
    }
  }

  return(plt)
}

#' @author 'Trevor' on StackOverflow
#' @source https://stackoverflow.com/questions/6862742/draw-a-circle-with-ggplot2
#' @noRd
make_circle <- function(r, xc = 0, yc = 0) {
  x <- xc + r * cos(seq(0, pi, length.out = 100))
  ymax <- yc + r * sin(seq(0, pi, length.out = 100))
  ymin <- yc + r * sin(seq(0, -pi, length.out = 100))
  data.frame(x = x, ymin = ymin, ymax = ymax)
}

#' Comma delimit two optional values
#' @noRd
combine <- function(a, b, percentage = FALSE) {
  if (percentage) {
    if (!is.null(a)) {
      a <- paste0(a * 100, "%")
    }
    if (!is.null(b)) {
      b <- paste0(b * 100, "%")
    }
  }
  paste(c(a, b) |> unique(), collapse = ", ")
}

#' Format performance indicators
#' @noRd
fmt_pi <- function(vec, x) {
  paste(round(vec[[x]], 2), collapse = ", ")
}

#' Conveniently construct a set of parameters for use in
#' [mqor][mqor-package] functions
#'
#' This is a convenience function for assembling complete sets of parameters for
#' use across `mqor`. It allows parameter sets to be simply re-used across
#' functions. This function simply returns an R list which is indexed by
#' `mqor` functions, meaning that users could construct the themselves, or even
#' pass a row of a dataframe in its stead.
#'
#' @param rv **Reference value.** A numeric value, used to define the
#'   relationship between measurement uncertainty and concentration level, for
#'   each pollutant and time aggregation. `rv` is expressed in the same units as
#'   the concentration of the pollutant.
#'
#' @param u_rv **Maximum accepted relative measurement uncertainty.** A numeric
#'   value, expressed as a percent (`0.1` is equivalent to 10%).
#'
#' @param a **The alpha parameter.** Fraction of the maximum accepted
#'   measurement uncertainty that is not proportional to the concentration,
#'   ranging between `0` and `1`. It is pollutant and measurement-type specific.
#'
#' @param b **The beta parameter.** Ratio between the maximum accepted modelling
#'   and measurement uncertainties for short- and long-term averages. It is
#'   pollutant and measurement-type specific, and it determines the stringency
#'   of the MQO. The stringency factor β corresponds to the maximum ratio of
#'   uncertainty of modelling applications as specified in the AAQD.
#'
#' @returns a list of length `5`, with names `term`, `rv`, `u_rv`, `a`, and `b`.
#'
#' @family parameter constructors
#'
#' @export
mqo_params <- function(
  rv = NULL,
  u_rv = NULL,
  a = NULL,
  b = NULL
) {
  if (
    is.null(rv) ||
      is.null(u_rv) ||
      is.null(a) ||
      is.null(b)
  ) {
    cli::cli_abort(
      c(
        "i" = "Please pass a complete set of analysis parameters using {.fun mqor::mqo_params} to the {.field params} argument."
      ),
      call = NULL
    )
  }

  list(
    rv = rv,
    u_rv = u_rv,
    a = a,
    b = b
  )
}

#' Access 'default' parameters for use in [mqor][mqor-package] functions
#'
#' This function provides an alternative to [mqo_params()] by allowing users to
#' simply filter the [default_params] dataset to create a [mqo_params()]-like
#' output suitable for the `params` argument of other `mqor` functions.
#'
#' @param term,type,pollutant Any of the options present in
#'   `default_params$term`, `default_params$type` or `default_params$pollutant`.
#'   Used to filter the [default_params] dataset.
#'
#' @param resolution An optional value present in `default_params$time`, used to
#'   distinguish between parameters when multiple short-term time averages are
#'   present.
#'
#' @returns a list of length `5`, with names `term`, `rv`, `u_rv`, `a`, and `b`.
#'
#' @family parameter constructors
#'
#' @export
mqo_params_default <- function(
  term = "",
  type = "",
  pollutant = "",
  resolution = NULL
) {
  i_term <- rlang::arg_match(term, unique(mqor::default_params$term))
  i_type <- rlang::arg_match(type, unique(mqor::default_params$type))
  pollutant <- tolower(pollutant)
  pollutant[pollutant == "pm25"] <- "pm2.5"

  if (!missing(pollutant) && !pollutant %in% mqor::default_params$pollutant) {
    cli::cli_abort(
      "Cannot identify {.field pollutant}; please use {.fun mqor::mqo_params} or {.fun mqor::mqo_params_default} to construct a parameter set."
    )
  }

  i_pollutant <- rlang::arg_match(
    pollutant,
    unique(mqor::default_params$pollutant)
  )

  out <-
    dplyr::filter(
      mqor::default_params,
      term == i_term,
      type == i_type,
      pollutant == i_pollutant
    )

  if (nrow(out) > 1) {
    if (is.null(resolution)) {
      cli::cli_abort(
        "Multiple possible parameters; please provide one of {.code {out$time}} to {.field resolution}."
      )
    } else {
      rlang::arg_match(resolution, out$time)
      out <- dplyr::filter(out, .data$time == resolution)
    }
  }

  mqo_params(
    rv = out$rv,
    u_rv = out$u_rv,
    a = out$a,
    b = out$b
  )
}

#' Convenience function for labelling columns in input `data`
#'
#' This function provides an interface to easily create a 'data dictionary' so
#' that `{mqor}` functions can correctly identify where observed values,
#' modelled values, site IDs, etc. are found in an input `data.frame`.
#'
#' @param obs,mod A character string to identify the columns associated with the
#'   observed (measured) (`obs`) and modelled (`mod`) concentrations. These will
#'   be used alongside `params` to calculate relevant metrics.
#'
#' @param site A character string to identify the column to group `obs` and
#'   `mod` by when calculating statistics. This does not strictly have to be a
#'   site name; a site ID or abbreviation would suffice, as long as the labels
#'   uniquely define separate sampling points.
#'
#' @param type A character string to identify the column to assign sites as
#'   either `"fixed"` or `"indicative"`.
#'
#' @param pollutant A character string to identify the column which
#'   distinguishes between different pollutant species. This column will be used
#'   to filter the data before it is calculated.
#'
#' @param date A character to identify a `POSIXct`/`Date` column. Only relevant
#'   for short-term data.
#'
#' @returns a list of length `5`, with names `obs`, `mod`, `site`, `type`,
#'   `pollutant` and `date`.
#'
#' @export
mqo_dict <- function(
  obs = "obs",
  mod = "mod",
  site = "site",
  type = "type",
  pollutant = "param",
  date = "date"
) {
  list(
    obs = obs,
    mod = mod,
    site = site,
    type = type,
    pollutant = pollutant,
    date = date
  )
}

#' Read Short- and Long-Term data in the recommended MQOR data structure
#'
#' This function reads one or more data files as well as an optional attributes
#' file, combines them together, and formats them in a way useful for the
#' `{mqor}` package. Short- and Long-Term data will be distinguished using the
#' presence of the `"Month"` column, which should only exist in short-term data.
#'
#' @param file_data Path(s) to one or more CSV files containing short- or
#'   long-term data, formatted as requested by the DELTA tool. A mix of short-
#'   and long- term data will fail.
#'
#' @param file_attributes An optional path to an 'attributes' CSV file, which
#'   must contain a `"Time_Series"` column used to join the two datasets
#'   together. The attributes data can contain metadata about the main datasets,
#'   such as the name of the model, monitoring station coordinates, and so on.
#'
#' @param delim The delimiter used to separate columns in the input files. The
#'   default is `";"`.
#'
#' @author Jack Davison
#'
#' @export
read_mqor <- function(
  file_data,
  file_attributes = NULL,
  delim = ";"
) {
  # read all data files
  df <-
    purrr::map(
      file_data,
      \(x) {
        data.table::fread(x, sep = delim) |>
          dplyr::mutate(Time_Series = basename(x))
      }
    ) |>
    dplyr::bind_rows() |>
    tibble::tibble()

  # is it short or longterm?
  if ("Month" %in% names(df)) {
    # if hour is missing, will be daily data - add it back here
    if (!"Hour" %in% names(df)) {
      df$Hour <- 0
    }
    # combine y/m/d/h into date
    df <-
      dplyr::mutate(
        df,
        date = lubridate::make_datetime(
          year = .data$Year,
          month = .data$Month,
          day = .data$Day,
          hour = .data$Hour,
          tz = "UTC"
        ),
        .after = "Pollutant"
      ) |>
      dplyr::select(
        -dplyr::any_of(c("Month", "Day", "Hour"))
      )
  }

  # if attributes given, read in and join
  if (!is.null(file_attributes)) {
    attributes <-
      data.table::fread(file_attributes, sep = delim) |>
      tibble::tibble()

    df <-
      dplyr::left_join(
        df,
        attributes,
        by = dplyr::join_by("Time_Series", "Year", "Pollutant", "Stat_Code"),
        suffix = c("", "__mqorextra"),
        relationship = "many-to-one"
      ) |>
      dplyr::select(
        -dplyr::ends_with("__mqorextra")
      ) |>
      dplyr::rename(
        type = "Measurement_Type"
      )
  }

  # rename some columns for convenience
  df <-
    df |>
    dplyr::rename(
      stat_code = "Stat_Code",
      param = "Pollutant",
      obs = "Observation",
      mod = "Model"
    ) |>
    dplyr::rename_with(tolower)

  # if there's a "year" column, make it into "date"
  if (!"date" %in% names(df)) {
    names(df)[names(df) == "year"] <- "date"
  } else {
    df$year <- NULL
  }

  # return data
  return(df)
}

#' Write Short- and Long-Term data in the recommended MQOR data structure
#'
#' This function takes a `data.frame` already in R and writes it to the `{mqor}`
#' flat file structure.
#'
#' @inheritParams data.table::fwrite
#'
#' @param data A `data.frame` to write to a file.
#'
#' @param delim The delimiter used to separate columns in the files. The default
#'   is `";"`.
#'
#' @param dict See [mqo_dict()] for more information. Acts as a data dictionary
#'   to specify the columns in the data `{mqor}` should use.
#'
#' @author Jack Davison
#'
#' @export
write_mqor <- function(data, file = "", delim = ";", dict = mqor::mqo_dict()) {
  data <-
    dplyr::select(
      data,
      dplyr::all_of(c(
        dict$site,
        dict$pollutant,
        dict$date,
        dict$obs,
        dict$mod
      ))
    )

  names(data)[names(data) == dict$obs] <- "Observation"
  names(data)[names(data) == dict$mod] <- "Model"
  names(data)[names(data) == dict$site] <- "Stat_Code"
  names(data)[names(data) == dict$pollutant] <- "Pollutant"

  if (inherits(data[[dict$date]], c("POSIXct", "Date"))) {
    data <-
      dplyr::mutate(
        data,
        Year = lubridate::year(.data[[dict$date]]),
        Month = lubridate::month(.data[[dict$date]]),
        Day = lubridate::day(.data[[dict$date]]),
        Hour = lubridate::hour(.data[[dict$date]]),
        .after = "Pollutant",
        .keep = "unused"
      )

    if (all(data$Hour == 0)) {
      data$Hour <- NULL
    }
  } else if (is.numeric(data[[dict$date]])) {
    names(data)[names(data) == dict$date] <- "Year"
  }

  data.table::fwrite(data, file = file, sep = delim)
  return(invisible(data))
}

1		#' Automatic text formatting for mqor
2		#'
3		#' This function automatically formats character strings for use in plotting,
4		#' applying subscripts, superscripts and Greek letters as appropriate.
5		#'
6		#' @param text A character vector
7		#' @returns an expression for graphical evaluation
8		#' @author Adapted from `openair::quickText()`. Original function authored by
9		#' Karl Ropkins.
10		#'
11		#' @noRd
12		format_text <- function(text) {
13	4x	ans <- paste("expression(paste('", text, " ", sep = "")
14	4x	ans <- gsub("NO2", "' 'NO' [2] * '", ans)
15	4x	ans <- gsub("no2", "' 'NO' [2] * '", ans)
16	4x	ans <- gsub("\\bno\\b", "NO", ans)
17	4x	ans <- gsub("\\bNOX\\b", "' 'NO' [x] * '", ans)
18	4x	ans <- gsub("\\bnox\\b", "' 'NO' [x] * '", ans)
19	4x	ans <- gsub("\\bNOx\\b", "' 'NO' [x] * '", ans)
20	4x	ans <- gsub("NH3", "' 'NH' [3] * '", ans)
21	4x	ans <- gsub("nh3", "' 'NH' [3] * '", ans)
22	4x	ans <- gsub("co ", "' 'CO ' '", ans)
23	4x	ans <- gsub("co,", "' 'CO,' '", ans)
24	4x	ans <- gsub("nmhc", "' 'NMHC' '", ans)
25
26	4x	ans <- if (nchar(as.character(text)) == 2 && length(grep("ws", text)) > 0) {
27	!	gsub("ws", "' 'wind spd.' '", ans)
28		} else {
29	4x	ans
30		}
31	4x	ans <- gsub("wd", "' 'wind dir.' '", ans)
32	4x	ans <- gsub("rh ", "' 'relative humidity' '", ans)
33	4x	ans <- gsub("PM10", "' 'PM' [10] * '", ans)
34	4x	ans <- gsub("pm10", "' 'PM' [10] * '", ans)
35	4x	ans <- gsub("pm1", "' 'PM' [1] * '", ans)
36	4x	ans <- gsub("PM1", "' 'PM' [1] * '", ans)
37	4x	ans <- gsub("PM4", "' 'PM' [4] * '", ans)
38	4x	ans <- gsub("pm4", "' 'PM' [4] * '", ans)
39	4x	ans <- gsub("PMtot", "' 'PM' [total] * '", ans)
40	4x	ans <- gsub("pmtot", "' 'PM' [total] * '", ans)
41	4x	ans <- gsub("pmc", "' 'PM' [coarse] * '", ans)
42
43	4x	ans <- gsub("pmcoarse", "' 'PM' [coarse] * '", ans)
44	4x	ans <- gsub("PMc", "' 'PM' [coarse] * '", ans)
45	4x	ans <- gsub("PMcoarse", "' 'PM' [coarse] * '", ans)
46	4x	ans <- gsub("pmf", "' 'PM' [fine] * '", ans)
47	4x	ans <- gsub("pmfine", "' 'PM' [fine] * '", ans)
48	4x	ans <- gsub("PMf", "' 'PM' [fine] * '", ans)
49	4x	ans <- gsub("PMfine", "' 'PM' [fine] * '", ans)
50	4x	ans <- gsub("PM2.5", "' 'PM' [2.5] * '", ans)
51	4x	ans <- gsub("pm2.5", "' 'PM' [2.5] * '", ans)
52	4x	ans <- gsub("pm25", "' 'PM' [2.5] * '", ans)
53	4x	ans <- gsub("PM2.5", "' 'PM' [2.5] * '", ans)
54	4x	ans <- gsub("PM25", "' 'PM' [2.5] * '", ans)
55	4x	ans <- gsub("pm25", "' 'PM' [2.5] * '", ans)
56	4x	ans <- gsub("O3", "' 'O' [3] * '", ans)
57	4x	ans <- gsub("o3", "' 'O' [3] * '", ans)
58	4x	ans <- gsub("ozone", "' 'O' [3] * '", ans)
59	4x	ans <- gsub("CO2", "' 'CO' [2] * '", ans)
60	4x	ans <- gsub("co2", "' 'CO' [2] * '", ans)
61	4x	ans <- gsub("SO2", "' 'SO' [2] * '", ans)
62	4x	ans <- gsub("so2", "' 'SO' [2] * '", ans)
63	4x	ans <- gsub("H2S", "' 'H' [2] * 'S''", ans)
64	4x	ans <- gsub("h2s", "' 'H' [2] * 'S''", ans)
65	4x	ans <- gsub("CH4", "' 'CH' [4] * '", ans)
66	4x	ans <- gsub("ch4", "' 'CH' [4] * '", ans)
67	4x	ans <- gsub("ug/m3", "' * mu * 'g m' ^-3 *'", ans)
68	4x	ans <- gsub("ug.m-3", "' * mu * 'g m' ^-3 *'", ans)
69	4x	ans <- gsub("ug m-3", "' * mu * 'g m' ^-3 *'", ans)
70	4x	ans <- gsub("ugm-3", "' * mu * 'g m' ^-3 *'", ans)
71	4x	ans <- gsub("mg/m3", "' * 'm' * 'g m' ^-3 *'", ans)
72	4x	ans <- gsub("mg.m-3", "' * 'm' * 'g m' ^-3 *'", ans)
73	4x	ans <- gsub("mg m-3", "' * 'm' * 'g m' ^-3 *'", ans)
74	4x	ans <- gsub("mgm-3", "' * 'm' * 'g m' ^-3 *'", ans)
75	4x	ans <- gsub("ng/m3", "' * 'n' * 'g m' ^-3 *'", ans)
76	4x	ans <- gsub("ng.m-3", "' * 'n' * 'g m' ^-3 *'", ans)
77	4x	ans <- gsub("ng m-3", "' * 'n' * 'g m' ^-3 *'", ans)
78	4x	ans <- gsub("ngm-3", "' * 'n' * 'g m' ^-3 *'", ans)
79	4x	ans <- gsub("m/s2", "' 'm s' ^-2 *'", ans)
80	4x	ans <- gsub("m/s", "' 'm s' ^-1 *'", ans)
81	4x	ans <- gsub("m.s-1", "' 'm s' ^-1 *'", ans)
82	4x	ans <- gsub("m s-1", "' 'm s' ^-1 *'", ans)
83	4x	ans <- gsub("km2", "' 'km' ^2 *'", ans)
84	4x	ans <- gsub("g/km", "' 'g km' ^-1 *'", ans)
85	4x	ans <- gsub("g/s", "' 'g s' ^-1 *'", ans)
86	4x	ans <- gsub("kW/t", "' 'kW t' ^-1 *'", ans)
87	4x	ans <- gsub("g/hour", "' 'g hour' ^-1 *'", ans)
88	4x	ans <- gsub("g/hr", "' 'g hour' ^-1 *'", ans)
89	4x	ans <- gsub("g/m3", "' 'g m' ^-3 *'", ans)
90	4x	ans <- gsub("g/kg", "' 'g kg' ^-1 *'", ans)
91	4x	ans <- gsub("cm-3", "' 'cm' ^-3 *'", ans)
92	4x	ans <- gsub("km/hr/s", "' 'km hr' ^-1 * ' s' ^-1 *'", ans)
93	4x	ans <- gsub("km/hour/s", "' 'km hr' ^-1 * ' s' ^-1 *'", ans)
94	4x	ans <- gsub("km/h/s", "km hr' ^-1 * ' s' ^-1 *'", ans)
95	4x	ans <- gsub("km/hr", "' 'km hr' ^-1 *'", ans)
96	4x	ans <- gsub("km/h", "' 'km hr' ^-1 *'", ans)
97	4x	ans <- gsub("km/hour", "' 'km hr' ^-1 *'", ans)
98
99	4x	ans <- paste(ans, "'))", sep = "")
100
101	4x	if (substr(ans, (nchar(ans) - 8), (nchar(ans) - 6)) == "] *") {
102	!	a <- ans
103	!	ans <- paste(
104	!	substr(a, 1, (nchar(a) - 7)),
105	!	substr(a, (nchar(a) - 5), nchar(a)),
106	!	sep = ""
107		)
108		}
109
110	4x	ans <- gsub("''", "", ans)
111	4x	ans <- gsub("' '", "", ans)
112	4x	ans <- gsub("\\* \\*", "~", ans)
113	4x	ans <- gsub("^expression\\(paste\\( \\*", "expression(paste(", ans)
114	4x	ans <- gsub("^expression\\(paste\\(\\*", "expression(paste(", ans)
115
116	4x	if (substr(ans, (nchar(ans) - 2), (nchar(ans) - 2)) == "*") {
117	!	a <- ans
118	!	ans <- paste(
119	!	substr(a, 1, (nchar(a) - 2)),
120		" ' ' ",
121	!	substr(a, (nchar(a) - 1), nchar(a)),
122	!	sep = ""
123		)
124		}
125
126	4x	if (inherits(try(eval(parse(text = ans)), TRUE), "try-error") == FALSE) {
127	4x	ans <- eval(parse(text = ans))
128		} else {
129	!	ans <- text
130		}
131
132	4x	return(ans)
133		}

#' Function to read CDF files in the DELTA format
#'
#' This function uses [ncdf4][ncdf4::ncdf4-package] to read a CDF file of
#' short-term air quality data into R as a `data.frame` for further analysis in
#' an `{mqor}` analysis pipeline. Alternatively, users can define a directory of
#' delimited text files with [read_delta_data_delim()].
#'
#' @param file A path to a .cdf file, formatted as requested by the DELTA tool.
#'
#' @param data_type One of `"obs"` or `"mod"`; used to correctly label the data
#'   as being observations or modelled data for later analysis.
#'
#' @return a [tibble][tibble::tibble-package]
#'
#' @family delta reading functions
#'
#' @export
read_delta_data_cdf <- function(file, data_type) {
  rlang::check_installed("ncdf4")

  data_type <- rlang::arg_match(data_type, c("obs", "mod"))

  nc <- ncdf4::nc_open(file)

  model <- gsub(".cdf", "", basename(file))

  on.exit(ncdf4::nc_close(nc))

  params <-
    ncdf4::ncatt_get(nc, varid = 0)$Parameters |>
    as.raw() |>
    rawToChar() |>
    strsplit(" ") |>
    (\(x) x[[1]])()

  vars <- names(nc$var)

  year <- ncdf4::ncatt_get(nc, 0)$Year
  start <- ncdf4::ncatt_get(nc, 0)$StartHour + 1L
  end <- ncdf4::ncatt_get(nc, 0)$EndHour + 1L

  dates <-
    seq(
      ISOdate(
        year = year,
        month = 1,
        day = 1,
        hour = 0,
        tz = "GMT"
      ),
      ISOdate(
        year = year,
        month = 12,
        day = 31,
        hour = 23,
        tz = "GMT"
      ),
      by = "hour"
    )[start:end]

  data <-
    purrr::map(
      vars,
      function(x) {
        array <- ncdf4::ncvar_get(nc, x)
        array |>
          t() |>
          as.data.frame() |>
          stats::setNames(params) |>
          dplyr::tibble() |>
          dplyr::mutate(site = x, date = dates, .before = 0L)
      },
      .progress = TRUE
    ) |>
    dplyr::bind_rows() |>
    tidyr::pivot_longer(
      -c("site", "date"),
      names_to = "param",
      values_to = data_type
    )

  # drop invalid values
  data[data_type][data[data_type] == -999] <- NA

  if (data_type == "mod") {
    # if modelled data, add a 'model' column
    data <- dplyr::mutate(data, model = model, .before = dplyr::everything())
  }

  return(data)
}

#' Function to read CSV files in the DELTA format
#'
#' This function uses [data.table::fread()] to read a directory of tabular files
#' of short-term air quality data into R as a `data.frame` for further analysis
#' in an `{mqor}` analysis pipeline. Alternatively, users can read a single
#' netCDF file with [read_delta_data_cdf()].
#'
#' @param path A path to a directory of delimited files, formatted as requested
#'   by the DELTA tool.
#' @param data_type One of `"obs"` or `"mod"`; used to correctly label the data
#'   as being observations or modelled data for later analysis.
#' @param model_name If `data_type = "mod"`, this value will be used to assign a
#'   model name to the data.
#' @param delim The delimited used to separate columns in the input files. The
#'   default, `;`, is prescribed by the DELTA tool.
#' @param pattern Passed to [dir()] to identify relevant files in `path`. Also
#'   used to clean the file names to attach site names to the data frames.
#'
#' @return a [tibble][tibble::tibble-package]
#'
#' @family delta reading functions
#'
#' @export
read_delta_data_delim <- function(
  path,
  data_type,
  model_name = "MODEL",
  delim = ";",
  pattern = ".csv"
) {
  data_type <- rlang::arg_match(data_type, c("obs", "mod"))

  data <-
    purrr::map2(
      .x = dir(path, full.names = TRUE, pattern = pattern),
      .y = dir(path, full.names = FALSE, pattern = pattern),
      .f = function(x, y) {
        data.table::fread(x, sep = delim, na.strings = "-999") |>
          dplyr::tibble() |>
          dplyr::mutate(site = gsub(pattern, "", y), .before = 0L)
      },
      .progress = TRUE
    ) |>
    dplyr::bind_rows() |>
    dplyr::mutate(
      date = ISOdate(
        year = .data$year,
        month = .data$month,
        day = .data$day,
        hour = .data$hour,
        tz = "GMT"
      ),
      .keep = "unused",
      .after = "site"
    )

  for (i in names(data)) {
    if (all(is.na(data[[i]]))) {
      data[i] <- NULL
    }
  }

  data <-
    tidyr::pivot_longer(
      data,
      -c("site", "date"),
      names_to = "param",
      values_to = data_type
    )

  if (data_type == "mod") {
    # if modelled data, add a 'model' column
    data <- dplyr::mutate(
      data,
      model = model_name,
      .before = dplyr::everything()
    )
  }

  return(data)
}

#' Function to read yearly CSV files in the DELTA format
#'
#' This function reads yearly data formatted as required by the DELTA tool,
#' structured in a single file. Monitoring data structured in multiple files can
#' alternatively be imported using [read_delta_yearly_dir()].
#'
#' @param file The path to a single CSV file containing yearly data, formatted
#'   as requested by the DELTA tool.
#'
#' @inheritParams read_delta_data_delim
#'
#' @return a [tibble][tibble::tibble-package]
#'
#' @family delta reading functions
#'
#' @export
read_delta_yearly_file <- function(
  file,
  data_type,
  model_name = "MODEL",
  delim = ";"
) {
  data_type <- rlang::arg_match(data_type, c("obs", "mod"))

  lines <- readLines(file)
  lines <- lines[!startsWith(lines, delim)]

  colnames <- strsplit(lines[1], ";")[[1]]
  data <-
    utils::read.table(
      text = lines[-1],
      sep = delim,
      col.names = c("site", colnames[-(1:2)])
    ) |>
    dplyr::mutate(date = as.integer(colnames[2])) |>
    tidyr::pivot_longer(
      -c("site", "date"),
      names_to = "param",
      values_to = data_type
    )

  if (data_type == "mod") {
    # if modelled data, add a 'model' column
    data <- dplyr::mutate(
      data,
      model = model_name,
      .before = dplyr::everything()
    )
  }

  data[[data_type]][data[[data_type]] == -999] <- NA

  return(data)
}

#' Function to read a directory of yearly CSV files in the DELTA format
#'
#' This function reads yearly data formatted as required by the DELTA tool,
#' structured across multiple files. This format is only supported for
#' monitoring data. Data structured in a single file can
#' alternatively be imported using [read_delta_yearly_file()].
#'
#' @inheritParams read_delta_data_delim
#'
#' @return a [tibble][tibble::tibble-package]
#'
#' @family delta reading functions
#'
#' @export
read_delta_yearly_dir <- function(
  path,
  delim = ";",
  pattern = ".csv"
) {
  files <- dir(path, full.names = TRUE, pattern = pattern)

  read_file <- function(file) {
    lines <- readLines(file)
    lines <- lines[!startsWith(lines, delim)]

    colnames <- strsplit(lines[1], ";")[[1]]

    utils::read.table(
      text = lines[2],
      sep = ";",
      col.names = colnames[-(1:2)]
    ) |>
      dplyr::mutate(
        date = as.integer(colnames[2]),
        site = gsub(pattern, "", basename(file))
      ) |>
      tidyr::pivot_longer(
        -c("site", "date"),
        names_to = "param",
        values_to = "obs"
      ) |>
      dplyr::mutate(obs = ifelse(.data$obs == -999, NA, .data$obs))
  }

  purrr::map(files, read_file) |>
    dplyr::bind_rows()
}

#' Function to read DELTA tool resource files
#'
#' These functions read selected files from the DELTA tool's `resource`
#' directory. The most useful file for `{mqor}` is likely the `startup` file as
#' this contains vital monitoring station metadata, but this function also
#' supports the `MyDeltaInput.ini`, `update.ini` and `init.ini` files for
#' completeness.
#'
#' @param file A path to an appropriate DELTA tool resource file; one of
#'   `init.ini`, `MyDeltaInput.dat`, `startup_*.ini`, or `update.ini`.
#'
#' @return a list
#'
#' @family delta reading functions
#'
#' @export
read_delta_resource <- function(file) {
  if (grepl("startup", file)) {
    lines <- readLines(file)

    id <- which(lines %in% c("[MODEL]", "[PARAMETERS]", "[MONITORING]"))

    model_lines <- lines[(id[1] + 1L):(id[2] - 1L)]
    model_lines <- model_lines[!grepl(";", model_lines)]

    year <- as.integer(model_lines[1])
    frequency <- model_lines[2]
    scale <- model_lines[3]

    param_lines <- lines[(id[2] + 1L):(id[3] - 1L)]
    param_lines <- param_lines[!startsWith(param_lines, ";")]
    params <- data.table::fread(text = c("param;type;unit", param_lines)) |>
      dplyr::tibble()

    monitoring_lines <- lines[(id[3] + 1L):length(lines)]
    monitoring_lines[!endsWith(monitoring_lines, ";")] <- paste0(
      monitoring_lines[!endsWith(monitoring_lines, ";")],
      ";"
    )

    monitoring <-
      data.table::fread(text = monitoring_lines) |>
      dplyr::tibble() |>
      dplyr::select(-dplyr::any_of("V13")) |>
      dplyr::rename_with(.fn = function(x) gsub(" ", "_", tolower(x))) |>
      dplyr::mutate(
        listofvariables = strsplit(.data$listofvariables, "\\*")
      )

    out <- list(
      year = year,
      frequency = frequency,
      scale = scale,
      params = params,
      monitoring = monitoring
    )
  } else if (grepl("update.ini", file) || grepl("init.ini", file)) {
    x <- readLines(file)
    x <- x[
      !startsWith(x, ";") & !startsWith(x, "#") & !startsWith(x, "[") & x != ""
    ]
    out <-
      strsplit(x, "=") |>
      purrr::map(.f = \(x) list(x[[2]]) |> stats::setNames(x[[1]])) |>
      purrr::flatten()
  } else if (grepl("MyDeltaInput", file)) {
    x <- readLines(file)
    x <- x[
      !startsWith(x, ";") & !startsWith(x, "#") & !startsWith(x, "[") & x != ""
    ]
    x <- gsub("\\", "/", x, fixed = TRUE)
    out <- as.list(stats::setNames(x, c("startup", "modeling", "monitoring")))
  } else {
    cli::cli_abort(
      "File not recognised; please provide one of the 'init.ini', 'MyDeltaInput.dat', 'startup_*.ini' or 'update.ini' files found in the DELTA 'resource' directory."
    )
  }

  out$file <- file

  return(out)
}

#' Convert the 'observation' and 'modelled' columns of a `mqor`-ready dataset
#' into rolling means
#'
#' This function takes a dataset prepared for `mqo` calculations and replaces
#' the `observation` and `modelled` data columns with a rolling equivalent.
#' Rolling values are calculated *within* groups of any categorical columns
#' present in the data (likely the "pollutant" and "site" columns, along with
#' any metadata).
#'
#' @inheritParams summarise_mqo_stats
#'
#' @param window_size Integer vector giving rolling window size(s). This is the
#'   total number of included values. Defaults to `8L`, which is the most useful
#'   for ozone statistics.
#'
#' @param min_coverage The minimum data coverage percent, expressed as a decimal
#'   (i.e., this option should be between `0` and `1`, representing 0% and 100%).
#'
#' @param progress Show a progress bar? Passed to [purrr::map()].
#'
#' @seealso `openair::rollingMean()` for a more flexible and performant version
#'   of `mutate_rolling_mean()`.
#'
#' @family data utilities
#'
#' @export
mutate_rolling_mean <- function(
  data,
  window_size = 8L,
  min_coverage = 0.75,
  progress = TRUE,
  dict = mqor::mqo_dict()
) {
  validate_coverage(min_coverage)
  data <- dplyr::arrange(data, .data[[dict$date]])

  cat_columns <-
    data |>
    dplyr::select(dplyr::where(is.character) | dplyr::where(is.factor)) |>
    names()

  purrr::map(
    split(data, data[c(cat_columns)], drop = TRUE),
    \(x) {
      x[[dict$obs]] <- data.table::frollmean(
        x = x[[dict$obs]],
        n = window_size,
        align = "right"
      )

      x[[dict$mod]] <- data.table::frollmean(
        x = x[[dict$mod]],
        n = window_size,
        align = "right"
      )

      flag_obs <- !is.na(x[[dict$obs]])
      obs_dc <- data.table::frollmean(
        x = flag_obs,
        n = window_size,
        align = "right"
      )

      flag_mod <- !is.na(x[[dict$mod]])
      mod_dc <- data.table::frollmean(
        x = flag_mod,
        n = window_size,
        align = "right"
      )

      x[[dict$obs]] <- dplyr::if_else(obs_dc <= min_coverage, NA, x[[dict$obs]])
      x[[dict$mod]] <- dplyr::if_else(mod_dc <= min_coverage, NA, x[[dict$mod]])

      return(x)
    },
    .progress = progress
  ) |>
    dplyr::bind_rows()
}

#' Calculate daily summaries of a `mqor`-ready dataset
#'
#' These functions take a dataset prepared for `mqo` calculations and calculates
#' a daily or annual mean, maximum or minimum value. Averages are calculated
#' *within* groups of any categorical columns present in the data (likely the
#' "pollutant" and "site" columns, along with any metadata).
#'
#' @inheritParams summarise_mqo_stats
#'
#' @param statistic One of `"mean"`, `"min"`, or `"max"`, representing the
#'   average, minimum or maximum
#'
#' @param min_coverage The minimum data coverage percent, expressed as a decimal
#'   (i.e., this option should be between `0` and `1`, representing 0% and
#'   100%). Note that calculations of data coverage can only be calculated for
#'   the data presented; the user must provide a complete timeseries for an
#'   accurate coverage calculation.
#'
#' @seealso `openair::timeAverage()` for a more flexible and performant version
#'   of `summarise_daily()`.
#'
#' @family data utilities
#'
#' @rdname summarise-temporal
#' @order 1
#' @export
summarise_daily <- function(
  data,
  statistic = "mean",
  min_coverage = 0.75,
  dict = mqor::mqo_dict()
) {
  summarise_temporal(
    data = data,
    statistic = statistic,
    min_coverage = min_coverage,
    dict = dict,
    floor = "day"
  )
}

#' @rdname summarise-temporal
#' @order 2
#' @export
summarise_annual <- function(
  data,
  statistic = "mean",
  min_coverage = 0.75,
  dict = mqor::mqo_dict()
) {
  data <-
    summarise_temporal(
      data = data,
      statistic = statistic,
      min_coverage = min_coverage,
      dict = dict,
      floor = "year"
    )

  data[[dict$date]] <- lubridate::year(data[[dict$date]])

  return(data)
}

#' @noRd
summarise_temporal <- function(
  data,
  statistic,
  min_coverage,
  dict,
  floor = c("day", "year")
) {
  validate_coverage(min_coverage)
  fun <- get_fun(statistic)

  # get categorical columns to summarise by
  cat_columns <-
    data |>
    dplyr::select(dplyr::where(is.character) | dplyr::where(is.factor)) |>
    names()

  # summarise data
  out <-
    data |>
    dplyr::mutate(date = lubridate::floor_date(.data[[dict$date]], floor)) |>
    dplyr::summarise(
      obs = fun(.data[[dict$obs]], min_coverage),
      mod = fun(.data[[dict$mod]], min_coverage),
      .by = dplyr::all_of(c("date", cat_columns))
    )

  # restore original modelled/observed names
  names(out)[names(out) == "mod"] <- dict$mod
  names(out)[names(out) == "obs"] <- dict$obs
  names(out)[names(out) == "date"] <- dict$date

  # return data
  return(out)
}


#' @noRd
get_fun <- function(statistic) {
  if (statistic == "mean") {
    fun <- safe_mean
  } else if (statistic == "max") {
    fun <- safe_max
  } else if (statistic == "min") {
    fun <- safe_min
  }
}

#' @noRd
safe_mean <- function(x, cov) {
  if (all(is.na(x)) | mean(!is.na(x)) < cov) {
    NA
  } else {
    mean(x, na.rm = TRUE)
  }
}

#' @noRd
safe_max <- function(x, cov) {
  if (all(is.na(x)) | mean(!is.na(x)) < cov) {
    NA
  } else {
    max(x, na.rm = TRUE)
  }
}

#' @noRd
safe_min <- function(x, cov) {
  if (all(is.na(x)) | mean(!is.na(x)) < cov) {
    NA
  } else {
    max(x, na.rm = TRUE)
  }
}

#' @noRd
validate_coverage <- function(min_coverage) {
  if (min_coverage < 0 || min_coverage > 1 || !is.numeric(min_coverage)) {
    cli::cli_abort(
      "{.field min_coverage} must be a number between {0L} and {1L}."
    )
  }
}

#' Validate that all observations have a matching modelled value, and vice-versa
#'
#' When calculating statistics, it is important that the methodology is applied
#' to observation-model pairs. This function can help identify and fix this situation.
#'
#' @param data A `data.frame` with numeric columns `dict$obs` and `dict$mod`.
#'
#' @param mode One of `"fix"` (the default), `"error"` or `"warn"`. If `"fix"`
#'   the function will return a dataframe with any mismatched values set to
#'   `NA`. If `"error"` or `"warn"`, the the function will either error or warn
#'   if mismatched values are detected.
#'
#' @inheritParams summarise_mqo_stats
#'
#' @author Jack Davison
#' @family data utilities
#' @export
validate_mod_obs_pairs <- function(
  data,
  mode = c("fix", "error", "warn"),
  dict = mqor::mqo_dict()
) {
  mode <- rlang::arg_match(mode)

  if (mode == "fix") {
    data <-
      dplyr::mutate(
        data,
        `__new_obs__` = dplyr::if_else(
          is.na(.data[[dict$mod]]),
          NA,
          .data[[dict$obs]]
        ),
        `__new_mod__` = dplyr::if_else(
          is.na(.data[[dict$obs]]),
          NA,
          .data[[dict$mod]]
        )
      )

    data[[dict$obs]] <- data[["__new_obs__"]]
    data[[dict$mod]] <- data[["__new_mod__"]]
    data[["__new_mod__"]] <- data[["__new_obs__"]] <- NULL
  }

  if (mode %in% c("error", "warn")) {
    if (mode == "error") {
      fun <- cli::cli_abort
    } else {
      fun <- cli::cli_warn
    }
    id <- is.na(data[[dict$mod]]) != is.na(data[[dict$obs]])

    if (any(id)) {
      fun(
        "Some modelled values do not have matching observations, or vice-versa."
      )
    }
  }

  return(data)
}

#' Calculate a ranked percentile of a numeric vector
#'
#' This function provides a convenient method to calculate a percentile value
#' from a numeric vector. The vector, `x`, is ranked in ascending order and the
#' *n*th percentile value is inferred by linear interpolation.
#'
#' @param x A numeric vector from which to calculate a percentile value.
#'
#' @param quantile A quantile value between `0` and `1`. The default, `0.9`,
#'   calculates the 90th percentile.
#'
#' @inheritParams stats::quantile
#'
#' @family data utilities
#'
#' @author Jack Davison
#'
#' @examples
#' mqis <- seq(0.1, 1, by = 0.1)
#'
#' mqo_percentile(mqis)
#'
#' mqo_percentile(mqis[1:9])
#'
#' @export
mqo_percentile <- function(x, quantile = 0.9, na.rm = FALSE) {
  if (!na.rm & anyNA(x)) {
    cli::cli_abort(
      "Missing values in {.field x} not allowed if {.field na.rm} is {FALSE}."
    )
  }

  if (!dplyr::between(quantile, 0, 1)) {
    cli::cli_abort("{.field quantile} must be between {0} and {1}.")
  }

  x <- x[!is.na(x)]

  x <- sort(x)

  n <- floor(length(x) * quantile)

  d <- (length(x) * quantile) - n

  return(x[n] + (x[n + 1] - x[n]) * d)
}

#' @noRd
determine_term <- function(data, term, dict) {
  if (is.null(term)) {
    sites <- data[[dict$site]]
    x <- as.vector(table(sites))
    if (any(x > 1)) {
      term <- "short"
    } else {
      term <- "long"
    }
    cli::cli_inform(c(
      "!" = "{.field term} assumed to be '{term}'.",
      "i" = "If this is incorrect, please specify the data's term using the {.field term} argument."
    ))
  } else {
    term <- validate_term(term)
  }

  return(term)
}

#' @noRd
determine_resolution <- function(data, term, dict) {
  if (term == "long") {
    return("annual")
  }

  counts <-
    purrr::map(
      split(data, data[[dict$site]]),
      function(x) {
        difftime(x[[dict$date]], dplyr::lag(x[[dict$date]]), units = "secs")
      }
    ) |>
    purrr::list_c() |>
    table()

  max_splits <- as.numeric(names(sort(counts)[1]))

  if (max_splits == 86400) {
    return("daily")
  } else if (max_splits == 3600) {
    return("hourly")
  } else {
    cli::cli_abort(
      "Cannot determine time resolution of data, please use {.fun mqor::mqo_params} or {.fun mqor::mqo_params_default} directly."
    )
  }
}

#' Function to read DELTA tool configuration files
#'
#' These functions read selected files from the DELTA tool's `configuration`
#' directory. These are likely not overly useful for `{mqor}` as it currently
#' stands, but may be useful in future if further features of the DELTA toolkit
#' are incorporated into `{mqor}` or similar packages.
#'
#' @param file A path to an appropriate DELTA tool configuration file, e.g.,
#'   `axis_type.dat`.
#'
#' @return a list
#'
#' @family delta reading functions
#'
#' @export
read_delta_config <- function(file) {
  if (grepl("aqibounds", file)) {
    read_aqibounds(file)
  } else {
    read_config(file)
  }
}

#' Read any config file
#' @noRd
read_config <- function(file) {
  # read raw data - drop anything beginning with ; or #
  x <- readLines(file)
  x <- x[!startsWith(x, ";")]
  x <- x[!startsWith(x, "#")]

  # get first row and format as vector of headers
  header <- x[1]
  header <- gsub("[", "", header, fixed = TRUE)
  header <- gsub("]", "", header, fixed = TRUE)
  header <- strsplit(header, ": ")[[1]][2]

  # some are delimited by commas, some by ;
  delim <- ","
  if (!grepl(",", header)) {
    delim <- ";"
  }

  # split headers into vector, ignoring delimters within parentheses
  split_outside_parentheses <- function(text) {
    # Pattern: match a comma not inside parentheses
    pattern <- paste0(delim, "(?![^()]*\\))")

    # Split the string using the pattern
    result <- strsplit(text, pattern, perl = TRUE)[[1]]

    # Trim whitespace
    result <- trimws(result)

    return(result)
  }

  # tidy up names
  header <- split_outside_parentheses(header)
  header <- header[header != "" & header != "\""]
  header <- gsub("\"", "", header, fixed = TRUE)

  # read in data
  if (length(x[-1]) != 0) {
    out <- utils::read.delim(text = x[-1], sep = ";", header = F)
    out <- stats::setNames(out, header[1:ncol(out)])

    for (i in names(out)) {
      if (all(is.na(out[[i]]))) {
        out[[i]] <- NULL
      }
    }
  } else {
    # if empty, return empty dataframe
    out <- data.frame()
    for (i in header) {
      out <- cbind(out, data.frame(character(0)) |> stats::setNames(i))
    }
  }

  out$file <- file

  return(tibble::tibble(out))
}

#' Special handling for the AQIBounds
#' @noRd
read_aqibounds <- function(file) {
  x <- readLines(file)
  x <- x[!startsWith(x, ";") & !startsWith(x, "#")]
  x <- x[x != ""]
  aqi_tables <-
    tibble::enframe(x, name = NULL) |>
    dplyr::mutate(id = ifelse(grepl("ID", .data$value), .data$value, NA)) |>
    tidyr::fill("id") |>
    dplyr::distinct() |>
    split(~id, drop = TRUE)

  read_aqi_tbl <- function(aqi) {
    aqi <- aqi$value
    aqi <- aqi[aqi != "END"]
    name <- trimws(strsplit(aqi[1], "=")[[1]][2])
    ncat <- trimws(strsplit(aqi[2], "=")[[1]][2])
    cat_names <- strsplit(trimws(strsplit(aqi[8], "=")[[1]][2]), ",")[[1]] |>
      trimws()

    n <- which(startsWith(aqi, "Cat_Names")) - 1

    lims <-
      strsplit(trimws(aqi[3:n]), "=") |>
      purrr::map(trimws)

    purrr::map(
      lims,
      function(x) {
        data.frame(
          value = strsplit(x[[2]], ", ")[[1]] |> trimws() |> as.numeric()
        ) |>
          stats::setNames(x[[1]])
      }
    ) |>
      dplyr::bind_cols() |>
      dplyr::slice_tail(n = -1) |>
      dplyr::mutate(
        aqi = name,
        category = rev(cat_names),
        .before = dplyr::everything()
      )
  }

  purrr::map(aqi_tables, read_aqi_tbl) |>
    dplyr::bind_rows() |>
    dplyr::tibble() |>
    tidyr::pivot_longer(
      cols = -c("aqi", "category"),
      values_to = "value",
      names_to = "param"
    )
}

#' Plot a bar chart visualising the Modelling Quality Indicator (MQI)
#'
#' This function produces an ordered bar chart visualising the modelling quality
#' indicator (MQI) for each `site` in the dataset. It also draws the 90th
#' percentile of MQIs as a horizontal line; if this is below unity (`1`) the
#' Modelling Quality Objective has been met. Unlike [plot_comparison_bars()],
#' both long-term and short-term MQIs/MQOs are visualised identically - although
#' input `params` and calculated `mqi` and `mqo` values will differ.
#'
#' @inheritParams plot_comparison_bars
#'
#' @param color_bars,color_outline The colours to use to fill the bars, and for
#'   the outlines/horizontal lines. Can be expressed as hex codes, or any
#'   colours listed in [colors()].
#'
#' @family plotting functions
#'
#' @author Jack Davison
#'
#' @examples
#' plot_mqi_bars(
#'   summarise_mqo_stats(demo_longterm, pollutant = "PM10")
#' )
#'
#' plot_mqi_bars(
#'   summarise_mqo_stats(demo_shortterm, pollutant = "PM10")
#' )
#'
#' @export
plot_mqi_bars <- function(
  stats,
  color_bars = "grey85",
  color_outline = "black",
  interactive = FALSE
) {
  validate_mqor_stats(stats)
  term <- stats$inputs$term

  stats$by_site$site <- forcats::fct_reorder(
    stats$by_site$site,
    stats$by_site$mqi
  )

  plotdata <- stats$by_site

  mqi90 <- stats$summary$mqi_90

  if (interactive) {
    rlang::check_installed("plotly")

    plt <-
      plotly::plot_ly() |>
      plotly::add_trace(
        x = plotdata$site,
        y = plotdata$mqi,
        type = "bar",
        color = I(color_bars),
        marker = list(line = list(color = color_outline, width = 1))
      ) |>
      plotly::add_annotations(
        y = mqi90 + 0.02,
        xref = "paper",
        x = 0.025,
        text = as.character(round(mqi90, 2)),
        showarrow = FALSE
      ) |>
      plotly::add_annotations(
        y = 1.02,
        xref = "paper",
        x = 0.025,
        text = "1.00",
        showarrow = FALSE
      ) |>
      plotly::layout(
        shapes = list(
          hline(1, color = color_outline, dash = "solid"),
          hline(mqi90, color = color_outline, dash = "dot")
        ),
        xaxis = list(
          title = "Sampling Points"
        ),
        yaxis = list(
          title = paste0("MQI<sub>", term, "</sub>"),
          fixedrange = TRUE
        )
      )
  } else {
    plt <-
      ggplot2::ggplot(
        plotdata,
        ggplot2::aes(x = .data$site, y = .data$mqi)
      ) +
      ggplot2::geom_col(
        width = (2 / 3),
        fill = color_bars,
        color = color_outline
      ) +
      ggplot2::geom_hline(yintercept = 1L, lty = 1, color = color_outline) +
      ggplot2::geom_hline(yintercept = mqi90, lty = 2, color = color_outline) +
      ggplot2::theme_bw(14) +
      ggplot2::scale_y_continuous(
        expand = ggplot2::expansion(c(0, .1)),
        sec.axis = ggplot2::sec_axis(~., breaks = c(1, round(mqi90, 2)))
      ) +
      ggplot2::labs(
        x = "Sampling Points",
        y = if (term == "short") {
          expression(MQI[short])
        } else if (term == "long") {
          expression(MQI[long])
        }
      )
  }

  return(plt)
}

#' Helper
#' @noRd
hline <- function(y = 0, color = "black", dash = "dash") {
  list(
    type = "line",
    x0 = 0,
    x1 = 1,
    xref = "paper",
    y0 = y,
    y1 = y,
    line = list(color = color, dash = dash)
  )
}

#' @method print mqor_stats
#' @export
print.mqor_stats <- function(x, ...) {
  cli::cli_h1("{.pkg mqor} Statistics Set")
  if (x$summary$mqo) {
    cli::cli_alert_success("{.field MQO} Passed")
  } else {
    cli::cli_alert_danger("{.field MQO} Failed")
  }
  cli::cli_dl(
    c(
      Pollutant = x$inputs$pollutant,
      Term = x$inputs$term
    )
  )

  cli::cli_h2("Objects")
  cli::cli_ul(id = "mqorlist")
  for (i in names(x)) {
    if (is.data.frame(x[[i]])) {
      cli::cli_li("x{.field ${i}} ({nrow(x[[i]])} row{?s})")
    } else {
      cli::cli_li("x{.field ${i}}")
    }
  }
  cli::cli_end(id = "mqorlist")
}

#' @method summary mqor_stats
#' @export
summary.mqor_stats <- function(object, ...) {
  object$summary
}

#' @method plot mqor_stats
#' @export
plot.mqor_stats <- function(x, ...) {
  if (x$inputs$term == "long") {
    plot_mqi_scatter(stats_longterm = x, ...)
  } else if (x$inputs$term == "short") {
    plot_mqi_scatter(stats_shortterm = x, ...)
  }
}

#' @noRd
is.mqor_stats <- function(x) {
  inherits(x, "mqor_stats")
}

#' @noRd
validate_mqor_stats <- function(stats) {
  if (!is.null(stats) && !is.mqor_stats(stats)) {
    cli::cli_abort(
      c(
        "!" = "This function expects an {.pkg mqor} statistics set generated by {.fun mqor::summarise_mqo_stats}.",
        "i" = "You have provided {.type {stats}}."
      ),
      call = NULL
    )
  }
}

#' Plot a 'report' summarising indicators for both short- and long-term model
#' quality objectives
#'
#' This function produces a series of one-dimensional scatter plots reporting
#' all of the spatial and temporal indicators, as well as the MQIs, contained
#' within short- and long-term statistics objects. In non-interactive plots, a
#' dotted vertical line indicates the 90th percentile (for MQIs & TIs) or
#' maximum (for SIs) values for each statistic.
#'
#' @inheritParams plot_mqi_scatter
#'
#' @param color_target The colour to use for highlighting the target range for
#'   each statistic (always `0` to `1`). This colour will recieve an alpha value
#'   of `0.15` to ensure the scatter markers are still visible against it.
#'
#' @param censor The maximum value to show in the 'report'. Any values above
#'   this will be reduced to this value. The default, `2`, is twice the size of
#'   the objective of `1` and is appropriate for most use cases.
#'
#' @family plotting functions
#'
#' @author Jack Davison
#'
#' @returns a [patchwork][patchwork::patchwork-package] assemblage
#'
#' @examples
#' long <- summarise_mqo_stats(demo_longterm, pollutant = "PM10")
#'
#' short <- summarise_mqo_stats(demo_shortterm, pollutant = "PM10")
#'
#' plot_mqi_report(short, long)
#'
#' @export
plot_mqi_report <- function(
  stats_shortterm = NULL,
  stats_longterm = NULL,
  color_fixed = "#4269D0",
  color_indicative = "#EFB118",
  color_outline = "black",
  color_target = "green4",
  censor = 2,
  interactive = FALSE
) {
  validate_mqor_stats(stats_shortterm)
  validate_mqor_stats(stats_longterm)

  if (!interactive) {
    plot_pi_bar <- function(data, x, xintercept, ylab) {
      data[[x]][data[[x]] > censor] <- censor

      ggplot2::ggplot(data, ggplot2::aes(x = abs(.data[[x]]), y = "")) +
        ggplot2::annotate(
          geom = "rect",
          ymax = Inf,
          ymin = -Inf,
          xmin = -Inf,
          xmax = 1,
          alpha = 0.15,
          fill = color_target
        ) +
        ggplot2::geom_vline(
          xintercept = min(c(censor, xintercept)),
          lty = 3,
          color = color_outline
        ) +
        ggplot2::geom_vline(
          xintercept = censor,
          lty = 2,
          color = color_outline
        ) +
        ggplot2::geom_jitter(width = 0, ggplot2::aes(color = .data$type)) +
        ggplot2::scale_x_continuous(
          limits = c(0, censor),
          sec.axis = ggplot2::sec_axis(
            ~.,
            breaks = xintercept,
            labels = signif(xintercept, 2)
          )
        ) +
        ggplot2::scale_color_manual(
          values = c(
            "fixed" = color_fixed,
            "indicative" = color_indicative
          ),
          aesthetics = c("fill", "colour")
        ) +
        ggplot2::labs(x = NULL, y = ylab, color = NULL) +
        ggplot2::theme_bw(14) +
        ggplot2::theme(
          axis.title.y = ggplot2::element_text(
            angle = 0,
            vjust = 0.5,
            hjust = 1
          )
        ) +
        ggplot2::coord_cartesian(clip = "off")
    }

    if (!is.null(stats_shortterm)) {
      plots_short <- list(
        plot_pi_bar(
          stats_shortterm$by_site,
          "mqi",
          stats_shortterm$summary$mqi_90,
          bquote(MQI[short])
        ) +
          ggplot2::labs(title = "Performance in Time"),
        plot_pi_bar(
          stats_shortterm$by_site,
          "ti_bias",
          stats_shortterm$summary$ti_bias_90,
          bquote(TI[B])
        ),
        plot_pi_bar(
          stats_shortterm$by_site,
          "ti_cor",
          stats_shortterm$summary$ti_cor_90,
          bquote(TI[R])
        ),
        plot_pi_bar(
          stats_shortterm$by_site,
          "ti_sd",
          stats_shortterm$summary$ti_sd_90,
          bquote(TI[sigma])
        )
      )
    } else {
      plots_short <- NULL
    }

    if (!is.null(stats_longterm)) {
      plots_long <- list(
        plot_pi_bar(
          stats_longterm$by_site,
          "mqi",
          stats_longterm$summary$mqi_90,
          bquote(MQI[long])
        ) +
          ggplot2::labs(title = "Performance in Space"),
        plot_pi_bar(
          stats_longterm$by_type,
          "si_rmse",
          max(stats_longterm$by_type$si_rmse),
          bquote(SI[RMSE])
        ),
        plot_pi_bar(
          stats_longterm$by_type,
          "si_bias",
          max(stats_longterm$by_type$si_bias),
          bquote(SI[B])
        ),
        plot_pi_bar(
          stats_longterm$by_type,
          "si_cor",
          max(stats_longterm$by_type$si_cor),
          bquote(SI[R])
        ),
        plot_pi_bar(
          stats_longterm$by_type,
          "si_sd",
          max(stats_longterm$by_type$si_sd),
          bquote(SI[sigma])
        )
      )
    } else {
      plots_long <- NULL
    }

    plts <- list()
    if (!is.null(plots_short)) {
      plts <- append(plts, plots_short)
    }
    if (!is.null(plots_short) && !is.null(plots_long)) {
      plts <- append(plts, list(patchwork::plot_spacer()))
    }
    if (!is.null(plots_long)) {
      plts <- append(plts, plots_long)
    }

    plt <-
      patchwork::wrap_plots(
        ncol = 1,
        plts
      ) +
      patchwork::plot_layout(guides = "collect", axes = "collect") &
      ggplot2::theme(
        legend.position = ifelse(
          dplyr::n_distinct(stats_shortterm$by_site$type) > 1,
          "bottom",
          "none"
        ),
        legend.justification = "left"
      )

    return(plt)
  }

  if (interactive) {
    add_report_plotly <- function(plt, data, showlegend = FALSE) {
      plt |>
        plotly::add_bars(
          inherit = FALSE,
          y = dplyr::distinct(data, .data$name)$name,
          x = rep(2, dplyr::n_distinct(data$name)),
          marker = list(line = list(color = color_outline, width = 1)),
          color = I(grDevices::rgb(0, 0, 0, 0)),
          hoverinfo = "skip",
          showlegend = FALSE,
          colors = c(
            "fixed" = color_fixed,
            "indicative" = color_indicative
          )
        ) |>
        plotly::add_bars(
          inherit = FALSE,
          y = dplyr::distinct(data, .data$name)$name,
          x = rep(1, dplyr::n_distinct(data$name)),
          color = I(col),
          hoverinfo = "skip",
          showlegend = FALSE,
          colors = c(
            "fixed" = color_fixed,
            "indicative" = color_indicative
          )
        ) |>
        plotly::add_markers(
          colors = c(
            "fixed" = color_fixed,
            "indicative" = color_indicative
          ),
          showlegend = showlegend
        ) |>
        plotly::layout(
          barmode = "overlay",
          xaxis = list(
            range = c(-0.1, censor),
            mirror = TRUE,
            title = ""
          ),
          yaxis = list(
            title = "",
            fixedrange = TRUE
          ),
          legend = list(
            orientation = "h",
            y = 1.1
          ),
          showlegend = TRUE
        )
    }

    col <- t(grDevices::col2rgb(color_target))
    col <- grDevices::rgb(
      red = col[1, "red"],
      green = col[1, "green"],
      blue = col[1, "blue"],
      alpha = 255 * 0.15,
      maxColorValue = 255
    )

    if (!is.null(stats_shortterm)) {
      shortdat <-
        stats_shortterm$by_site |>
        dplyr::select(c("site", "type", "mqi", "ti_bias", "ti_cor", "ti_sd")) |>
        tidyr::pivot_longer(c("mqi", "ti_bias", "ti_cor", "ti_sd")) |>
        dplyr::mutate(
          name = factor(
            .data$name,
            rev(c("mqi", "ti_bias", "ti_cor", "ti_sd")),
            rev(c(
              "MQI<sub>short</sub>",
              "TI<sub>B</sub>",
              "TI<sub>R</sub>",
              "TI<sub>SD</sub>"
            ))
          ),
          value = abs(.data$value),
          value = ifelse(.data$value > censor, censor, .data$value)
        )

      plt_short <-
        shortdat |>
        plotly::plot_ly(
          y = ~name,
          x = ~ abs(value),
          color = ~type,
          colors = c(
            "fixed" = color_fixed,
            "indicative" = color_indicative
          ),
          legendgroup = ~type,
          text = ~site
        ) |>
        add_report_plotly(shortdat, showlegend = TRUE)
    } else {
      plt_short <- NULL
    }

    if (!is.null(stats_longterm)) {
      longdat <-
        dplyr::bind_rows(
          dplyr::select(
            stats_longterm$by_site,
            c("site", "type", "mqi")
          ) |>
            tidyr::pivot_longer("mqi"),
          tidyr::pivot_longer(
            data = stats_longterm$by_type,
            -"type"
          )
        ) |>
        dplyr::mutate(
          name = factor(
            .data$name,
            rev(c("mqi", "si_rmse", "si_bias", "si_cor", "si_sd")),
            rev(c(
              "MQI<sub>long</sub>",
              "SI<sub>RMSE</sub>",
              "SI<sub>B</sub>",
              "SI<sub>R</sub>",
              "SI<sub>SD</sub>"
            ))
          ),
          value = abs(.data$value),
          value = ifelse(.data$value > censor, censor, .data$value)
        )

      plt_long <-
        longdat |>
        plotly::plot_ly(
          y = ~name,
          x = ~ abs(value),
          color = ~type,
          text = ~site,
          legendgroup = ~type,
          showlegend = is.null(stats_shortterm)
        ) |>
        add_report_plotly(longdat, showlegend = is.null(stats_shortterm))
    } else {
      plt_long <- NULL
    }

    if (!is.null(plt_short) && !is.null(plt_long)) {
      plt <-
        plotly::subplot(
          shareX = TRUE,
          titleY = TRUE,
          nrows = 2,
          plt_short,
          plt_long,
          heights = c(4 / 9, 5 / 9)
        )
    } else if (!is.null(plt_short)) {
      plt <- plt_short
    } else if (!is.null(plt_long)) {
      plt <- plt_long
    }

    return(plt)
  }
}

#' Plot a bar chart comparing observed (measured) and modelled concentrations
#'
#' This function produces a plot comparing modelled and measured concentrations.
#' For long-term data, the provided annual averaged measured data is visualised
#' alongside the performance acceptability range of the observations. For
#' short-term data, the RMSE and RMSU*0 are visualised.
#'
#' @param stats The output of [summarise_mqo_stats()]. All relevant information
#'   (e.g., `term`, `params_fixed`, etc.) will be passed to this function.
#'
#' @param interactive If `FALSE`, the default, a static `ggplot2` graphic will
#'   be returned which can be saved as a PNG, SVG, or other similar format. If
#'   `TRUE`, a dynamic HTML widget will be returned created by `plotly`.
#'
#' @param color_obs,color_mod,color_outline The colours to use to fill the
#'   'observation' bars, 'modelled' bars, and for the outlines/error bars. Can
#'   be expressed as hex codes, or any colours listed in [colors()].
#'
#' @family plotting functions
#'
#' @author Jack Davison
#'
#' @examples
#' plot_comparison_bars(
#'   summarise_mqo_stats(demo_longterm, pollutant = "PM10")
#' )
#'
#' plot_comparison_bars(
#'   summarise_mqo_stats(demo_shortterm, pollutant = "PM10")
#' )
#'
#' @export
plot_comparison_bars <- function(
  stats,
  color_obs = "grey85",
  color_mod = "black",
  color_outline = "black",
  interactive = FALSE
) {
  validate_mqor_stats(stats)
  if (interactive) {
    rlang::check_installed("plotly")
  }
  term <- stats$inputs$term

  if (term == "long") {
    plotdata <-
      stats$by_site |>
      tidyr::pivot_longer(cols = c("mod", "obs"), names_to = "stat") |>
      dplyr::mutate(
        ar_low = dplyr::if_else(.data$stat == "mod", NA, .data$ar_low),
        ar_up = dplyr::if_else(.data$stat == "mod", NA, .data$ar_up),
        error = dplyr::if_else(
          .data$stat == "mod",
          0,
          .data$ar_up - .data$value
        )
      ) |>
      dplyr::mutate(
        stat = factor(
          .data$stat,
          c("obs", "mod"),
          c("Observations", "Modelled Values")
        )
      )

    if (interactive) {
      # need to split the data, to overcome known issue in {plotly}
      # <https://github.com/plotly/plotly.R/issues/762>
      plotdata_splits <- split(plotdata, plotdata$stat)

      plt <-
        plotly::plot_ly() |>
        plotly::add_trace(
          type = "bar",
          x = plotdata_splits$Observations[["site"]],
          y = plotdata_splits$Observations$value,
          error_y = list(
            array = plotdata_splits$Observations$error,
            color = color_outline
          ),
          name = "Observations",
          color = I(color_obs),
          marker = list(line = list(color = color_outline, width = 1))
        ) |>
        plotly::add_trace(
          type = "bar",
          x = plotdata_splits$`Modelled Values`[["site"]],
          y = plotdata_splits$`Modelled Values`$value,
          name = "Modelled Values",
          color = I(color_mod),
          marker = list(line = list(color = color_outline, width = 1))
        ) |>
        plotly::layout(
          xaxis = list(title = "Sampling Points"),
          yaxis = list(
            title = "Concentrations (&mu;g&nbsp;m<sup>-3</sup>)",
            fixedrange = TRUE
          ),
          legend = list(orientation = "h", y = 1.1),
          showlegend = TRUE
        )
    } else {
      plt <-
        ggplot2::ggplot(
          plotdata,
          ggplot2::aes(x = .data[["site"]], y = .data$value)
        ) +
        ggplot2::geom_col(
          ggplot2::aes(fill = .data$stat),
          position = ggplot2::position_dodge2(width = 2 / 3),
          width = 2 / 3,
          color = color_outline
        ) +
        ggplot2::geom_linerange(
          ggplot2::aes(
            ymax = .data$ar_up,
            ymin = .data$ar_low,
            group = .data$stat
          ),
          na.rm = TRUE,
          position = ggplot2::position_dodge2(width = 2 / 3)
        ) +
        ggplot2::theme_bw(14) +
        ggplot2::scale_y_continuous(expand = ggplot2::expansion(c(0, .1))) +
        ggplot2::scale_fill_manual(values = c(color_obs, color_mod)) +
        ggplot2::theme(legend.position = "top", legend.justification = "left") +
        ggplot2::labs(
          x = "Sampling Points",
          y = format_text("Concentrations (ug/m3)"),
          fill = NULL
        )
    }
  }

  if (term == "short") {
    plotdata <-
      stats$by_site |>
      dplyr::select(dplyr::all_of(c("site", "rmse", "rmsu_star"))) |>
      tidyr::pivot_longer(
        cols = -"site",
        names_to = "stat"
      ) |>
      dplyr::mutate(
        stat = factor(.data$stat, c("rmse", "rmsu_star"))
      )

    if (interactive) {
      plotdata_splits <- split(plotdata, plotdata$stat)

      plt <-
        plotly::plot_ly() |>
        plotly::add_trace(
          type = "bar",
          x = plotdata_splits$rmse[["site"]],
          y = plotdata_splits$rmse$value,
          name = "RMSE",
          color = I(color_obs),
          marker = list(line = list(color = color_outline, width = 1))
        ) |>
        plotly::add_trace(
          type = "bar",
          x = plotdata_splits$rmsu_star[["site"]],
          y = plotdata_splits$rmsu_star$value,
          name = "RMSU<sub>0</sub><sup>*</sup>",
          color = I(color_mod),
          marker = list(line = list(color = color_outline, width = 1))
        ) |>
        plotly::layout(
          xaxis = list(title = "Sampling Points"),
          yaxis = list(
            title = "Concentrations (&mu;g&nbsp;m<sup>-3</sup>)",
            fixedrange = TRUE
          ),
          legend = list(orientation = "h", y = 1.1),
          showlegend = TRUE
        )
    } else {
      plt <-
        ggplot2::ggplot(
          plotdata,
          ggplot2::aes(x = .data[["site"]], y = .data$value)
        ) +
        ggplot2::geom_col(
          ggplot2::aes(fill = .data$stat),
          position = ggplot2::position_dodge2(width = 2 / 3),
          width = 2 / 3,
          color = color_outline
        ) +
        ggplot2::theme_bw(14) +
        ggplot2::scale_y_continuous(expand = ggplot2::expansion(c(0, .1))) +
        ggplot2::scale_fill_manual(
          values = c(color_obs, color_mod),
          labels = c(expression(RMSE), expression(RMSU[O]^"*"))
        ) +
        ggplot2::theme(legend.position = "top", legend.justification = "left") +
        ggplot2::labs(
          x = "Sampling Points",
          y = format_text("Concentration (ug/m3)"),
          fill = NULL
        )
    }
  }

  return(plt)
}

#' Write tables of MQO summary data to a file
#'
#' This function takes the output of [summarise_mqo_stats()] and writes it to a
#' file. Either short-term or long-term data can be written, or both at once.
#'
#' @inheritParams plot_mqi_report
#'
#' @inheritParams data.table::fwrite
#'
#' @param digits Integer indicating the number of decimal places to be used when
#'   rounding numeric values.
#'
#' @param file Output file name. `""` indicates output to the console.
#'
#' @param delim The delimiter used to separate columns in the files. The default
#'   is `";"`.
#'
#' @author Jack Davison
#'
#' @export
write_mqo_stats <- function(
  stats_shortterm = NULL,
  stats_longterm = NULL,
  file = "",
  digits = 2,
  delim = ";"
) {
  validate_mqor_stats(stats_longterm)
  validate_mqor_stats(stats_shortterm)

  fmt <- function(x) {
    stringr::str_replace_all(x, "_", " ") |>
      stringr::str_to_title()
  }

  write_empty_line <- function() {
    data.table::fwrite(as.list(""), file = file, append = TRUE)
  }

  if (file.exists(file)) {
    unlink(file, force = TRUE)
  }

  write_stats <- function(stats) {
    title <- glue::glue(
      "{fmt(stats$inputs$term)}-term {stats$inputs$pollutant} "
    )

    data.table::fwrite(as.list(title), file = file, append = TRUE)
    write_empty_line()

    data.table::fwrite(as.list("Parameters"), file = file, append = TRUE)
    dplyr::bind_rows(
      "Fixed" = stats$inputs$params_fixed,
      "Indicative" = stats$inputs$params_indicative,
      .id = "type"
    ) |>
      data.table::fwrite(
        file = file,
        append = TRUE,
        col.names = TRUE,
        row.names = FALSE,
        sep = delim,
        quote = FALSE
      )

    purrr::walk(
      names(stats)[names(stats) != "inputs"],
      function(x) {
        write_empty_line()
        data.table::fwrite(as.list(fmt(x)), file = file, append = TRUE)
        suppressWarnings(
          data.table::fwrite(
            x = stats[[x]] |>
              dplyr::mutate(dplyr::across(dplyr::where(is.numeric), \(x) {
                round(x, digits = digits)
              })),
            file = file,
            append = TRUE,
            col.names = TRUE,
            row.names = FALSE,
            sep = delim,
            quote = FALSE
          )
        )
      }
    )

    write_empty_line()
  }

  if (!is.null(stats_shortterm)) {
    write_stats(stats_shortterm)
  }
  if (!is.null(stats_longterm)) {
    write_stats(stats_longterm)
  }

  return(invisible(file))
}

#' Quickly plot observed (measured) and modelled concentrations over time in
#' short-term data
#'
#' This is a useful function for examining short-term data *after* it has been
#' filtered or aggregated, but *before* it statistics are calculated. Unlike
#' other plotting functions in `{mqor}`, this function **does not** take the
#' output of [summarise_mqo_stats()].
#'
#' @param data An R `data.frame` containing at least four columns; a numeric
#'   column of observed values, a numeric column of modelled values, a character
#'   or factor column of identifiers that identify the site associated with the
#'   concentrations, and a character or factor column of identifiers that
#'   identify the pollutant being measured/modelled. See [demo_shortterm] for an
#'   example format.
#'
#' @param pollutant The pollutant to plot. Should be a value contained within
#'   `data[[dict$pollutant]]`.
#'
#' @param site The station to plot.s Should be a value contained within
#'   `data[[dict$site]]`.
#'
#' @param dict See [mqo_dict()] for more information. Acts as a data dictionary
#'   to specify the columns in the data `{mqor}` should use.
#'
#' @param color_obs,color_mod The colours to use for the 'observation' and
#'   'modelled' lines. Can be expressed as hex codes, or any colours listed in
#'   [colors()].
#'
#' @inheritParams plot_comparison_bars
#'
#' @family plotting functions
#'
#' @author Jack Davison
#'
#' @export
plot_timeseries <- function(
  data,
  pollutant,
  site,
  color_obs = "#6CC5B0",
  color_mod = "black",
  dict = mqor::mqo_dict(),
  interactive = FALSE
) {
  pollutant <- rlang::arg_match(
    pollutant,
    unique(as.character(data[[dict$pollutant]]))
  )

  data <- data[
    data[[dict$pollutant]] == pollutant & data[[dict$site]] == site,
  ]

  if (nrow(data) == 0L) {
    cli::cli_warn(
      "No data returned; please check {.field pollutant} and {.field site}."
    )
    return(NULL)
  }

  plotdata <-
    tidyr::pivot_longer(
      data,
      cols = dplyr::all_of(c(dict$obs, dict$mod)),
      names_to = "mqor_name",
      values_to = "mqor_value"
    )

  if (interactive) {
    plotdata$mqor_name[plotdata$mqor_name == "obs"] <- "Observations"
    plotdata$mqor_name[plotdata$mqor_name == "mod"] <- "Modelled Values"

    plt <-
      plotly::plot_ly() |>
      plotly::add_trace(
        x = plotdata[[dict$date]],
        y = plotdata$mqor_value,
        color = plotdata$mqor_name,
        colors = stats::setNames(
          c(color_mod, color_obs),
          c("Modelled Values", "Observations")
        ),
        type = "scatter",
        mode = "lines+markers"
      ) |>
      plotly::layout(
        yaxis = list(title = toupper(data[[dict$pollutant]][1])),
        xaxis = list(title = ""),
        legend = list(
          orientation = "h",
          y = 1.1
        ),
        hovermode = "x unified"
      )
  }

  if (!interactive) {
    plt <-
      plotdata |>
      ggplot2::ggplot(ggplot2::aes(
        x = .data[[dict$date]],
        y = .data$mqor_value
      )) +
      ggplot2::geom_line(ggplot2::aes(color = .data$mqor_name)) +
      ggplot2::geom_point(ggplot2::aes(color = .data$mqor_name)) +
      ggplot2::theme_bw(14) +
      ggplot2::theme(legend.position = "top", legend.justification = "left") +
      ggplot2::labs(
        x = NULL,
        y = toupper(data[[dict$pollutant]][1]),
        color = NULL
      ) +
      ggplot2::scale_color_manual(
        values = stats::setNames(
          c(color_mod, color_obs),
          c(dict$mod, dict$obs)
        ),
        breaks = c(dict$obs, dict$mod),
        labels = c("Observations", "Modelled Values")
      )
  }

  return(plt)
}

#' Create an interactive table of MQO summary data
#'
#' This function takes the output of [summarise_mqo_stats()] and creates an
#' interactive table of the outputs. The output table is nested per object,
#'
#' @inheritParams plot_comparison_bars
#'
#' @param expanded Initialise the widget as expanded? Defaults to `TRUE`. Passed
#'   to [reactable::reactable()]'s `defaultExpanded` argument.
#'
#' @param color_pass,color_fail,color_fixed,color_indicative These colours are
#'   used to colour various cells in the output. Can be expressed as hex codes,
#'   or any colours listed in [colors()].
#'
#' @returns a `reactable::reactable()` object
#'
#' @examples
#' tabulate_mqo_stats(
#'   summarise_mqo_stats(demo_longterm, pollutant = "PM10")
#' )
#'
#' tabulate_mqo_stats(
#'   summarise_mqo_stats(demo_shortterm, pollutant = "PM10")
#' )
#'
#' @export
tabulate_mqo_stats <- function(
  stats,
  expanded = TRUE,
  color_pass = "#136F63",
  color_fail = "#840032",
  color_fixed = "#4269D0",
  color_indicative = "#EFB118"
) {
  validate_mqor_stats(stats)
  rlang::check_installed("reactable")

  style_lt_one <- make_style_lt_one(
    color_pass = color_pass,
    color_fail = color_fail
  )

  style_type <- make_style_type(
    color_fixed = color_fixed,
    color_indicative = color_indicative
  )

  tblname <- glue::glue(
    "MQO Statistics: {stats$inputs$pollutant}, {stats$inputs$term} term"
  )

  term <- stats$inputs$term

  if (term == "short") {
    tbl <- dplyr::tibble(
      x = c("Overall Summary", "Stats by Site", "Parameters")
    ) |>
      stats::setNames(tblname) |>
      reactable::reactable(
        defaultExpanded = expanded,
        sortable = FALSE,
        searchable = FALSE,
        filterable = FALSE,
        details = function(index) {
          if (index == 1) {
            tabulate_short_summary(stats$summary, style_lt_one = style_lt_one)
          } else if (index == 2) {
            tabulate_short_by_site(
              stats$by_site,
              style_lt_one = style_lt_one,
              style_type = style_type
            )
          } else if (index == 3) {
            tabulate_inputs(
              stats$inputs,
              term = term
            )
          }
        }
      )
  }

  if (term == "long") {
    tbl <-
      dplyr::tibble(
        x = c("Overall Summary", "Stats by Type", "Stats by Site", "Parameters")
      ) |>
      stats::setNames(tblname) |>
      reactable::reactable(
        defaultExpanded = expanded,
        sortable = FALSE,
        searchable = FALSE,
        filterable = FALSE,
        details = function(index) {
          if (index == 1) {
            tabulate_long_summary(stats$summary, style_lt_one = style_lt_one)
          } else if (index == 2) {
            tabulate_long_by_type(
              stats$by_type,
              style_lt_one = style_lt_one,
              style_type = style_type
            )
          } else if (index == 3) {
            tabulate_long_by_site(
              stats$by_site,
              style_lt_one = style_lt_one,
              style_type = style_type
            )
          } else if (index == 4) {
            tabulate_inputs(stats$inputs, term = term)
          }
        }
      )
  }
  return(tbl)
}

#' @noRd
tabulate_short_summary <- function(data, style_lt_one) {
  reactable::reactable(
    dplyr::relocate(data, "mqo"),
    outlined = TRUE,
    defaultColDef = reactable::colDef(
      html = TRUE,
      format = reactable::colFormat(digits = 2)
    ),
    pagination = FALSE,
    columns = list(
      mqo = reactable::colDef(
        "MQO",
        align = "right",
        cell = function(value) {
          ifelse(value, paste("Pass ", svg_check), paste("Fail ", svg_cross))
        },
        style = style_lt_one
      ),
      mqi_90 = reactable::colDef(
        "MQI<sub>90th</sub>",
        cell = cell_lt_one,
        style = style_lt_one
      ),
      ti_bias_90 = reactable::colDef(
        "TI<sub>B,90th</sub>",
        cell = cell_lt_one,
        style = style_lt_one
      ),
      ti_cor_90 = reactable::colDef(
        "TI<sub>R,90th</sub>",
        cell = cell_lt_one,
        style = style_lt_one
      ),
      ti_sd_90 = reactable::colDef(
        "TI<sub>&sigma;,90th</sub>",
        cell = cell_lt_one,
        style = style_lt_one
      )
    ),
    columnGroups = list(
      reactable::colGroup(
        name = "Temporal Indicators",
        columns = c("ti_bias_90", "ti_cor_90", "ti_sd_90")
      )
    )
  )
}

#' @noRd
tabulate_short_by_site <- function(data, style_lt_one, style_type) {
  reactable::reactable(
    dplyr::relocate(data, dplyr::starts_with("sd_"), .after = "mean_mod"),
    outlined = TRUE,
    defaultColDef = reactable::colDef(
      html = TRUE,
      align = "right",
      vAlign = "center",
      format = reactable::colFormat(digits = 2)
    ),
    pagination = FALSE,
    columns = list(
      site = reactable::colDef("Site", align = "right"),
      type = reactable::colDef(
        "Type",
        align = "right",
        cell = cell_type,
        style = style_type
      ),
      mean_obs = reactable::colDef(
        '<span style="text-decoration: overline;">O</span>'
      ),
      mean_mod = reactable::colDef(
        '<span style="text-decoration: overline;">M</span>'
      ),
      bias = reactable::colDef("BIAS"),
      rmse = reactable::colDef("RMSE"),
      rmsu = reactable::colDef("RMSU"),
      rmsu_star = reactable::colDef("RMSU*"),
      r = reactable::colDef("COR."),
      sd_mod = reactable::colDef("&sigma;<sub>M</sub>"),
      sd_obs = reactable::colDef("&sigma;<sub>O</sub>"),
      crmse = reactable::colDef("CRMSE"),
      mqi = reactable::colDef("MQI", cell = cell_lt_one, style = style_lt_one),
      ti_bias = reactable::colDef(
        "TI<sub>B</sub>",
        cell = cell_lt_one,
        style = style_lt_one
      ),
      ti_cor = reactable::colDef(
        "TI<sub>R</sub>",
        cell = cell_lt_one,
        style = style_lt_one
      ),
      ti_sd = reactable::colDef(
        "TI<sub>&sigma;</sub>",
        cell = cell_lt_one,
        style = style_lt_one
      ),
      ti_crmse = reactable::colDef(
        "TI<sub>CRMSE</sub>",
        cell = cell_lt_one,
        style = style_lt_one
      )
    ),
    columnGroups = list(
      reactable::colGroup(
        name = "Temporal Indicators",
        columns = c("ti_bias", "ti_cor", "ti_sd", "ti_crmse")
      )
    )
  )
}

#' @noRd
tabulate_long_summary <- function(data, style_lt_one) {
  reactable::reactable(
    dplyr::relocate(data, "mqo", "mqi_90"),
    outlined = TRUE,
    defaultColDef = reactable::colDef(
      html = TRUE,
      format = reactable::colFormat(digits = 2)
    ),
    pagination = FALSE,
    columns = list(
      mqo = reactable::colDef(
        "MQO",
        align = "right",
        cell = function(value) {
          ifelse(value, paste("Pass ", svg_check), paste("Fail ", svg_cross))
        },
        style = style_lt_one
      ),
      mqi_90 = reactable::colDef(
        "MQI<sub>90th</sub>",
        cell = cell_lt_one,
        style = style_lt_one
      ),
      bias = reactable::colDef("BIAS"),
      rmse = reactable::colDef("RMSE"),
      r = reactable::colDef("COR."),
      sd_mod = reactable::colDef("&sigma;<sub>M</sub>"),
      sd_obs = reactable::colDef("&sigma;<sub>O</sub>"),
      rmsu = reactable::colDef("RMSU")
    )
  )
}

#' @noRd
tabulate_long_by_type <- function(data, style_lt_one, style_type) {
  reactable::reactable(
    data,
    outlined = TRUE,
    defaultColDef = reactable::colDef(
      html = TRUE,
      format = reactable::colFormat(digits = 2)
    ),
    pagination = FALSE,
    columns = list(
      type = reactable::colDef(
        "Type",
        align = "right",
        cell = cell_type,
        style = style_type
      ),
      si_bias = reactable::colDef(
        "SI<sub>B</sub>",
        cell = cell_lt_one,
        style = style_lt_one
      ),
      si_cor = reactable::colDef(
        "SI<sub>R</sub>",
        cell = cell_lt_one,
        style = style_lt_one
      ),
      si_sd = reactable::colDef(
        "SI<sub>&sigma;</sub>",
        cell = cell_lt_one,
        style = style_lt_one
      ),
      si_rmse = reactable::colDef(
        "SI<sub>RMSE</sub>",
        cell = cell_lt_one,
        style = style_lt_one
      )
    ),
    columnGroups = list(
      reactable::colGroup(
        name = "Spatial Indicators",
        columns = c("si_bias", "si_cor", "si_sd", "si_rmse")
      )
    )
  )
}

#' @noRd
tabulate_long_by_site <- function(data, style_lt_one, style_type) {
  reactable::reactable(
    dplyr::relocate(data, "mqi", .after = "type"),
    outlined = TRUE,
    defaultColDef = reactable::colDef(
      html = TRUE,
      format = reactable::colFormat(digits = 2)
    ),
    pagination = FALSE,
    columns = list(
      site = reactable::colDef("Site", align = "right"),
      type = reactable::colDef(
        "Type",
        align = "right",
        cell = cell_type,
        style = style_type
      ),
      obs = reactable::colDef(
        '<span style="text-decoration: overline;">O</span>'
      ),
      mod = reactable::colDef(
        '<span style="text-decoration: overline;">M</span>'
      ),
      uncer = reactable::colDef(
        "U(&#333;)"
      ),
      ar_low = reactable::colDef(
        "AR<sub>low</sub>"
      ),
      ar_up = reactable::colDef(
        "AR<sub>up</sub>"
      ),
      mqi = reactable::colDef("MQI", cell = cell_lt_one, style = style_lt_one)
    )
  )
}

#' @noRd
tabulate_inputs <- function(data, term) {
  tibble::tibble(term = data$term, pollutant = data$pollutant) |>
    dplyr::bind_cols(
      data$params_fixed |>
        dplyr::rename_with(.fn = \(x) paste0("f_", x))
    ) |>
    dplyr::bind_cols(
      data$params_indicative |>
        dplyr::rename_with(.fn = \(x) paste0("i_", x))
    ) |>
    dplyr::select(-"i_rv") |>
    reactable::reactable(
      outlined = TRUE,
      filterable = FALSE,
      searchable = FALSE,
      pagination = FALSE,
      defaultColDef = reactable::colDef(
        align = "right",
        html = TRUE
      ),
      columns = list(
        term = reactable::colDef("Term", align = "right"),
        pollutant = reactable::colDef("Pollutant", align = "right"),
        f_rv = reactable::colDef(glue::glue("RV<sub>{term}</sub>")),
        f_u_rv = reactable::colDef(glue::glue(
          "U<sub>0r</sub>(RV<sub>{term}</sub>)"
        )),
        f_a = reactable::colDef(glue::glue("&alpha;<sub>{term}</sub>")),
        f_b = reactable::colDef(glue::glue("&beta;<sub>{term}</sub>")),
        i_u_rv = reactable::colDef(glue::glue(
          "U<sub>0r</sub>(RV<sub>{term}</sub>)"
        )),
        i_a = reactable::colDef(glue::glue("&alpha;<sub>{term}</sub>")),
        i_b = reactable::colDef(glue::glue("&beta;<sub>{term}</sub>"))
      ),
      columnGroups = list(
        reactable::colGroup(
          "Fixed",
          align = "right",
          columns = c("f_u_rv", "f_a", "f_b"),
        ),
        reactable::colGroup(
          "Indicative",
          align = "right",
          columns = c("i_u_rv", "i_a", "i_b")
        )
      )
    )
}

#' @noRd
cell_lt_one <- function(value) {
  ifelse(
    value <= 1,
    paste(round(value, 2), svg_check),
    paste(round(value, 2), svg_cross)
  )
}

#' @noRd
make_style_lt_one <- function(color_pass, color_fail) {
  function(value) {
    if (is.logical(value)) {
      color <- ifelse(value, color_pass, color_fail)
    } else {
      color <- ifelse(value <= 1, color_pass, color_fail)
    }
    list(color = color, fontWeight = "bold")
  }
}

#' @noRd
cell_type <- function(value) {
  value <- stringr::str_to_title(value)
  ifelse(value == "Fixed", paste(value, svg_building), paste(value, svg_box))
}

#' @noRd
make_style_type <- function(color_fixed, color_indicative) {
  function(value) {
    color <- ifelse(value == "fixed", color_fixed, color_indicative)
    list(color = color)
  }
}

svg_check <-
  "<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 16 16\" class=\"bi bi-check-circle \" style=\"height:1em;width:1em;fill:currentColor;vertical-align:-0.125em;\" aria-hidden=\"true\" role=\"img\" ><path d=\"M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z\"></path>\n<path d=\"M10.97 4.97a.235.235 0 0 0-.02.022L7.477 9.417 5.384 7.323a.75.75 0 0 0-1.06 1.06L6.97 11.03a.75.75 0 0 0 1.079-.02l3.992-4.99a.75.75 0 0 0-1.071-1.05z\"></path></svg>"

svg_cross <-
  "<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 16 16\" class=\"bi bi-x-circle \" style=\"height:1em;width:1em;fill:currentColor;vertical-align:-0.125em;\" aria-hidden=\"true\" role=\"img\" ><path d=\"M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z\"></path>\n<path d=\"M4.646 4.646a.5.5 0 0 1 .708 0L8 7.293l2.646-2.647a.5.5 0 0 1 .708.708L8.707 8l2.647 2.646a.5.5 0 0 1-.708.708L8 8.707l-2.646 2.647a.5.5 0 0 1-.708-.708L7.293 8 4.646 5.354a.5.5 0 0 1 0-.708z\"></path></svg>"

svg_building <-
  "<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 16 16\" class=\"bi bi-building \" style=\"height:1em;width:1em;fill:currentColor;vertical-align:-0.125em;\" aria-hidden=\"true\" role=\"img\" ><path d=\"M4 2.5a.5.5 0 0 1 .5-.5h1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-1a.5.5 0 0 1-.5-.5v-1Zm3 0a.5.5 0 0 1 .5-.5h1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-1a.5.5 0 0 1-.5-.5v-1Zm3.5-.5a.5.5 0 0 0-.5.5v1a.5.5 0 0 0 .5.5h1a.5.5 0 0 0 .5-.5v-1a.5.5 0 0 0-.5-.5h-1ZM4 5.5a.5.5 0 0 1 .5-.5h1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-1a.5.5 0 0 1-.5-.5v-1ZM7.5 5a.5.5 0 0 0-.5.5v1a.5.5 0 0 0 .5.5h1a.5.5 0 0 0 .5-.5v-1a.5.5 0 0 0-.5-.5h-1Zm2.5.5a.5.5 0 0 1 .5-.5h1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-1a.5.5 0 0 1-.5-.5v-1ZM4.5 8a.5.5 0 0 0-.5.5v1a.5.5 0 0 0 .5.5h1a.5.5 0 0 0 .5-.5v-1a.5.5 0 0 0-.5-.5h-1Zm2.5.5a.5.5 0 0 1 .5-.5h1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-1a.5.5 0 0 1-.5-.5v-1Zm3.5-.5a.5.5 0 0 0-.5.5v1a.5.5 0 0 0 .5.5h1a.5.5 0 0 0 .5-.5v-1a.5.5 0 0 0-.5-.5h-1Z\"></path>\n<path d=\"M2 1a1 1 0 0 1 1-1h10a1 1 0 0 1 1 1v14a1 1 0 0 1-1 1H3a1 1 0 0 1-1-1V1Zm11 0H3v14h3v-2.5a.5.5 0 0 1 .5-.5h3a.5.5 0 0 1 .5.5V15h3V1Z\"></path></svg>"

svg_box <-
  "<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 16 16\" class=\"bi bi-box \" style=\"height:1em;width:1em;fill:currentColor;vertical-align:-0.125em;\" aria-hidden=\"true\" role=\"img\" ><path d=\"M8.186 1.113a.5.5 0 0 0-.372 0L1.846 3.5 8 5.961 14.154 3.5 8.186 1.113zM15 4.239l-6.5 2.6v7.922l6.5-2.6V4.24zM7.5 14.762V6.838L1 4.239v7.923l6.5 2.6zM7.443.184a1.5 1.5 0 0 1 1.114 0l7.129 2.852A.5.5 0 0 1 16 3.5v8.662a1 1 0 0 1-.629.928l-7.185 2.874a.5.5 0 0 1-.372 0L.63 13.09a1 1 0 0 1-.63-.928V3.5a.5.5 0 0 1 .314-.464L7.443.184z\"></path></svg>"

#' Filter a dataset by temporal characteristics
#'
#' These functions filter a dataset using features of the "date" column, as
#' defined by [mqo_dict()]. Current options include the year, the month of the
#' year, the day of the week, and the hour of the day. These options allow users
#' to flexibly filter their data; for example, returning only weekday evenings
#' in Summer months. All but [filter_year()] require short-term data;
#' [filter_year()] will work out whether the data is short- or long-term based
#' on the data type of the "date" column.
#'
#' @inheritParams summarise_mqo_stats
#'
#' @param years,months,wdays,hours A numeric vector indicating the years (any
#'   intger), months of the year (`1` to `12`), days of the week (`1` to `7`,
#'   where `1` is Sunday), or hours of the day (`1` to `24`) to retain in the
#'   data.
#'
#' @family data utilities
#'
#' @rdname filter-time
#' @export
filter_year <- function(data, years, dict = mqor::mqo_dict()) {
  if (is.numeric(data[[dict$date]])) {
    id <- data[[dict$date]] %in% years
  } else {
    id <- lubridate::year(data[[dict$date]]) %in% years
  }
  return(data[id, ])
}

#' @rdname filter-time
#' @export
filter_month <- function(data, months = 1:12, dict = mqor::mqo_dict()) {
  validate_filter_inputs(months, 1:12, "months")
  id <- lubridate::month(data[[dict$date]]) %in% months
  return(data[id, ])
}

#' @rdname filter-time
#' @export
filter_wday <- function(data, wdays = 1:7, dict = mqor::mqo_dict()) {
  validate_filter_inputs(wdays, 1:7, "wdays")
  id <- lubridate::wday(data[[dict$date]], week_start = 7) %in% wdays
  return(data[id, ])
}

#' @rdname filter-time
#' @export
filter_hour <- function(data, hours = 0:23, dict = mqor::mqo_dict()) {
  validate_filter_inputs(hours, 0:23, "hours")
  id <- lubridate::hour(data[[dict$date]]) %in% hours
  return(data[id, ])
}

#' @noRd
validate_filter_inputs <- function(x, allowed, field) {
  error <- x[!x %in% allowed]
  if (length(error) > 0 || !rlang::is_integerish(x)) {
    cli::cli_abort(c(
      "{.field {field}} should only cover the integer values {min{allowed}} to {max{allowed}}.",
      "Bad values: {error}"
    ))
  }
}

#' Get path to `{mqor}` example files
#'
#' `{mqor}` comes bundled with some sample datasets in its `inst/demo_files`
#' directory. These are identical to [demo_shortterm] and [demo_longterm], and
#' can be read using [read_mqor()]. This function makes these files easy to
#' access.
#'
#' @param file Path to file. If `NULL`, the example files will be listed.
#' @seealso [demo_shortterm], [download_demo_files()]
#' @export
#' @examples
#' demo_files()
#' demo_files("demo_longterm.csv")
demo_files <- function(file = NULL) {
  opts <- dir(system.file("demo_files", package = "mqor"))
  if (is.null(file)) {
    return(opts)
  } else {
    file <- rlang::arg_match(file, opts, multiple = FALSE)
    system.file("demo_files", file, package = "mqor", mustWork = TRUE)
  }
}

#' Download larger example files from GitLab
#'
#' As well as the simple files found via [demo_files()], `{mqor}` provides more
#' complex data which is too large to bundle with an R package. This function
#' downloads these files to a directory. These files are the demo dataset found
#' in the DELTA tool, reformatted for [read_mqor()].
#'
#' @param dir The destination directory for the files.
#'
#' @param files A vector for the specific files to download.
#'
#' @seealso [demo_files()]
#'
#' @export
download_demo_files <- function(
  dir,
  files = c("delta_attributes.csv", "delta_shortterm.csv", "delta_longterm.csv")
) {
  urlstr <- "https://code.europa.eu/jrcairqualitymodelling/mqor/-/raw/main/data-delta/FILE?ref_type=heads"

  files <- rlang::arg_match(files, multiple = TRUE)

  if (!dir.exists(dir)) {
    dir.create(dir)
  }
  for (i in files) {
    utils::download.file(
      url = gsub("FILE", i, urlstr),
      destfile = file.path(dir, i)
    )
  }
}

#' Simple demonstration datasets taken from the CEN technical specification
#'
#' These two datasets contain dummy data outlined in Annex G of *Ambient Air —
#' Definition and use of modelling quality objectives for air quality
#' assessment*. These data are mostly useful for demonstrating `mqor`
#' functionality for teaching and learning purposes. `demo_shortterm` should be
#' used with `mqo_params(term = "short")` and `demo_longterm` with
#' `mqo_params(term = "long")`. Note that `demo_shortterm` only has 5 rows of
#' data for expedience; real datasets should have periodic data representing a
#' whole year at least.
#'
#' \describe{
#'   \item{site}{Site identifiers; S1 through S15.}
#'   \item{type}{Site type; all are 'fixed' butcould also be 'indicative'.}
#'   \item{param}{The pollutant (in this case "PM10")}
#'   \item{date}{The datetime of the measurement. An integer in `demo_longterm`, and a Date in `demo_shortterm`.}
#'   \item{obs}{Observed (measured) values.}
#'   \item{mod}{Modelled values.}
#' }
#'
#' @rdname demo-data
#' @seealso [demo_files()]
#' @order 1
#' @source Ambient Air — Definition and use of modelling quality objectives for
#'   air quality assessment (TC 264 WI 00264248:2025(E))
#'
#' @examples
#' demo_shortterm
"demo_shortterm"

#' @rdname demo-data
#' @order 2
#'
#' @examples
#' demo_longterm
"demo_longterm"

#' Format DELTA-formatted files in a way needed by `{mqor}`
#'
#' This function ingests monitoring and modeling data, as well as site metadata
#' contained within the `startup.ini` file, and returns a `data.frame` in an
#' appropriate format for other `{mqor}` functions (e.g.,
#' [summarise_mqo_stats()]).
#'
#' @param monitoring A `data.frame` imported using [read_delta_data_delim()]
#'   containing monitoring data.
#'
#' @param modeling One or more `data.frames` imported using
#'   [read_delta_data_delim()] or [read_delta_data_cdf()] containing modeling
#'   data. Multiple objects should be provided as a list.
#'
#' @param startup The object returned by [read_delta_resource()] when `file =
#'   "startup_*.ini"`
#'
#' @param data_type Whether to label the DELTA input data as representing
#'   `"fixed"` or `"indicative"` measurements. Defaults to `"fixed"`.
#'
#' @export
fmt_delta_for_mqor <- function(
  monitoring,
  modeling,
  startup,
  data_type = c("fixed", "indicative")
) {
  data_type <- rlang::arg_match(data_type)

  joined <-
    dplyr::left_join(
      monitoring,
      dplyr::bind_rows(modeling),
      dplyr::join_by("date", "site", "param")
    )

  meta <- startup$monitoring
  names(meta)[names(meta) == "station_name"] <- "site"

  joined <-
    dplyr::left_join(joined, meta, dplyr::join_by("site")) |>
    dplyr::mutate(type = data_type, .after = "site")

  return(joined)
}

#' Launch the `{mqor}` Shiny Interface
#'
#' This app runs [shiny::shinyAppDir()] to launching a web interface built into
#' `{mqor}` which allows users to use certain functions without needing to write
#' any R scripts.
#'
#' @inheritParams shiny::shinyAppDir
#'
#' @author Jack Davison
#'
#' @export
launch_app <- function(options = list()) {
  rlang::check_installed(c(
    "shiny",
    "bslib",
    "plotly",
    "leaflet",
    "rlang",
    "reactable"
  ))
  shiny::shinyAppDir(
    appDir = system.file("shiny", package = "mqor"),
    options = options
  )
}

1		#' Quickly summarise observed (measured) and modelled concentrations for MQO
2		#' evaluation
3		#'
4		#' This function calculates all of the model quality objective indictors (e.g.,
5		#' [vec_mqi()]) and all of the complementary indicators (e.g.,
6		#' [vec_pi_bias()]) for a given dataset. Further relevant summaries are also
7		#' calculated; for example, the 90th percentile of `mqi` values, `mqi_90`, is
8		#' calculated using [mqo_percentile()]. Two or three datasets are returned; an
9		#' overall `summary` dataset, a `by_site` site-wise summary, and - for long-term
10		#' data - a `by_type` split fixed/indicative summary.
11		#'
12		#' @param data An R `data.frame` containing at least five columns; a numeric
13		#' column of observed values, a numeric column of modelled values, a character
14		#' or factor column of identifiers that identify the site associated with the
15		#' concentrations, a character or factor column of identifiers that identify
16		#' the pollutant being measured/modelled, and a character or factor column
17		#' containing just `"fixed"` or `"indicative"` to label each site. See
18		#' [demo_shortterm] for an example format.
19		#'
20		#' @param pollutant The pollutant to calculate model quality objectives for.
21		#' Should be a value contained within `data[[dict$pollutant]]`.
22		#'
23		#' @param term Either `"short"` or `"long"`, identifying whether `data`
24		#' represents Short- or Long-term data. The default, `NULL`, will assume this
25		#' from the `data` by checking whether `data[[dict$site]]` contains duplicate
26		#' values.
27		#'
28		#' @param params_fixed,params_indicative See [mqo_params()] for more
29		#' information. Defines fixed and indicative parameters. By default, `{mqor}`
30		#' attempts to sample appropriate values from [default_params] using `term`
31		#' and `pollutant`.
32		#'
33		#' @param dict See [mqo_dict()] for more information. Acts as a data dictionary
34		#' to specify the columns in the data `{mqor}` should use.
35		#'
36		#' @param table By default this function returns a list of different data
37		#' objects. `table` allows a user to select just one. Can be one of
38		#' `"summary"`, `"by_type"`, `"by_site"` or `"inputs"`. Note that, if `table`
39		#' is provided, the output cannot be then fed into [plot_mqi_scatter()] and
40		#' similar functions.
41		#'
42		#' @returns a list with at most the objects `summary` showing whole data
43		#' summaries (e.g., `mqi_90`), `by_type` which shows per-fixed/indicative
44		#' values, `by_site` which shows per-site values (e.g., `mqi`), and `inputs`
45		#' which give the input parameters. The columns in each vary depending on the
46		#' value of `term`. Just one of these can be selected using the `table`
47		#' argument.
48		#'
49		#' @family model performance indicators
50		#'
51		#' @examples
52		#' summarise_mqo_stats(
53		#' demo_longterm,
54		#' pollutant = "PM10"
55		#' )
56		#'
57		#' summarise_mqo_stats(
58		#' demo_shortterm,
59		#' pollutant = "PM10"
60		#' )
61		#'
62		#' @author Jack Davison
63		#'
64		#' @export
65		summarise_mqo_stats <- function(
66		data,
67		pollutant,
68		term = NULL,
69		params_fixed = NULL,
70		params_indicative = NULL,
71		dict = mqor::mqo_dict(),
72		table = NULL
73		) {
74	6x	pollutant <- rlang::arg_match(
75	6x	pollutant,
76	6x	unique(as.character(data[[dict$pollutant]]))
77		)
78	6x	data <- data[data[[dict$pollutant]] == pollutant, ]
79
80	6x	data <-
81	6x	dplyr::filter(
82	6x	data,
83	6x	dplyr::if_else(
84	6x	all(is.na(.data[[dict$obs]])) \| all(is.na(.data[[dict$mod]])),
85	6x	true = FALSE,
86	6x	false = TRUE
87		),
88	6x	.by = c(dict$site)
89		)
90
91	6x	term <- determine_term(data, term, dict)
92
93	6x	if (missing(params_fixed)) {
94	6x	res <- determine_resolution(data, term, dict)
95
96	6x	params_fixed <-
97	6x	mqo_params_default(
98	6x	term = term,
99	6x	type = "fixed",
100	6x	pollutant = pollutant,
101	6x	resolution = res
102		)
103		}
104
105	6x	if (missing(params_indicative)) {
106	6x	res <- determine_resolution(data, term, dict)
107
108	6x	params_indicative <-
109	6x	mqo_params_default(
110	6x	term = term,
111	6x	type = "indicative",
112	6x	pollutant = pollutant,
113	6x	resolution = res
114		)
115		}
116
117	6x	data_splits <- split(data, data[[dict$type]])
118
119	6x	if (term == "short") {
120	3x	site_stats <- calculate_split_stats(
121	3x	fun = calc_shortterm_site_stats,
122	3x	data = data,
123	3x	data_splits = data_splits,
124	3x	term = term,
125	3x	params_fixed = params_fixed,
126	3x	params_indicative = params_indicative,
127	3x	obs = dict$obs,
128	3x	mod = dict$mod,
129	3x	site = dict$site,
130	3x	type = dict$type
131		)
132
133	3x	group_stats <-
134	3x	site_stats \|>
135	3x	dplyr::summarise(
136	3x	mqi_90 = mqo_percentile(.data$mqi, na.rm = TRUE),
137	3x	ti_bias_90 = mqo_percentile(abs(.data$ti_bias), na.rm = TRUE),
138	3x	ti_cor_90 = mqo_percentile(abs(.data$ti_cor), na.rm = TRUE),
139	3x	ti_sd_90 = mqo_percentile(abs(.data$ti_sd), na.rm = TRUE)
140		) \|>
141	3x	dplyr::mutate(mqo = .data$mqi_90 <= 1, .after = "mqi_90")
142		}
143
144	6x	if (term == "long") {
145	3x	site_stats <- calculate_split_stats(
146	3x	fun = calc_longterm_site_stats,
147	3x	data = data,
148	3x	data_splits = data_splits,
149	3x	term = term,
150	3x	params_fixed = params_fixed,
151	3x	params_indicative = params_indicative,
152	3x	obs = dict$obs,
153	3x	mod = dict$mod,
154	3x	site = dict$site,
155	3x	type = dict$type
156		)
157
158	3x	type_stats <- calculate_split_stats(
159	3x	fun = calc_longterm_type_stats,
160	3x	data = data,
161	3x	data_splits = data_splits,
162	3x	term = term,
163	3x	params_fixed = params_fixed,
164	3x	params_indicative = params_indicative,
165	3x	obs = dict$obs,
166	3x	mod = dict$mod,
167	3x	site = dict$site,
168	3x	type = dict$type
169		)
170
171	3x	group_stats <-
172	3x	dplyr::summarise(
173	3x	data,
174	3x	bias = vec_bias(
175	3x	obs = .data[[dict$obs]],
176	3x	mod = .data[[dict$mod]],
177	3x	na.rm = TRUE
178		),
179	3x	rmse = vec_rmse(
180	3x	obs = .data[[dict$obs]],
181	3x	mod = .data[[dict$mod]],
182	3x	na.rm = TRUE
183		),
184	3x	r = vec_cor(
185	3x	obs = .data[[dict$obs]],
186	3x	mod = .data[[dict$mod]],
187	3x	na.rm = TRUE
188		),
189	3x	sd_obs = vec_sd(
190	3x	x = .data[[dict$obs]],
191	3x	term = term,
192	3x	na.rm = TRUE
193		),
194	3x	sd_mod = vec_sd(
195	3x	x = .data[[dict$mod]],
196	3x	term = term,
197	3x	na.rm = TRUE
198		)
199		)
200
201	3x	uncer <- list()
202	3x	if ("fixed" %in% names(data_splits)) {
203	3x	uncer <-
204	3x	append(
205	3x	uncer,
206	3x	list(dplyr::mutate(
207	3x	data_splits$fixed,
208	3x	uncer = scalar_uncer(
209	3x	.data[[dict$obs]],
210	3x	term = term,
211	3x	params = params_fixed
212		)
213		))
214		)
215		}
216	3x	if ("indicative" %in% names(data_splits)) {
217	2x	uncer <-
218	2x	append(
219	2x	uncer,
220	2x	list(
221	2x	dplyr::mutate(
222	2x	data_splits$indicative,
223	2x	uncer = scalar_uncer(
224	2x	.data[[dict$obs]],
225	2x	term = term,
226	2x	params = params_indicative
227		)
228		)
229		)
230		)
231		}
232	3x	rmsu <-
233	3x	uncer \|>
234	3x	dplyr::bind_rows() \|>
235	3x	dplyr::summarise(rmsu = vec_rmsu_uncer(uncer, na.rm = TRUE))
236
237	3x	group_stats$rmsu <- rmsu$rmsu
238	3x	group_stats$mqi_90 <- mqo_percentile(site_stats$mqi, na.rm = TRUE)
239	3x	group_stats$mqo <- group_stats$mqi_90 <= 1
240		}
241
242	6x	site_stats[[dict$site]] <- factor(
243	6x	site_stats[[dict$site]],
244	6x	unique(data[[dict$site]])
245		)
246	6x	site_stats <- dplyr::arrange(site_stats, .data[[dict$site]])
247
248	6x	out <- list(summary = group_stats)
249	6x	if (term == "long") {
250	3x	out <- append(out, list(by_type = type_stats))
251		}
252	6x	out <- append(out, list(by_site = site_stats))
253
254		# rename for consistency
255	6x	for (i in seq_along(out)) {
256	15x	for (j in seq_along(dict)) {
257	90x	names(out[[i]])[names(out[[i]]) == dict[[j]]] <- names(dict)[[j]]
258		}
259		}
260
261		# append input info
262	6x	out <- append(
263	6x	out,
264	6x	list(
265	6x	inputs = list(
266	6x	term = term,
267	6x	pollutant = pollutant,
268	6x	params_fixed = enframe_params(params_fixed),
269	6x	params_indicative = enframe_params(params_indicative)
270		)
271		)
272		)
273
274		# return specific table?
275	6x	if (!is.null(table)) {
276	!	rlang::arg_match(table, names(out), multiple = FALSE)
277	!	out <- out[[table]]
278		} else {
279	6x	class(out) <- "mqor_stats"
280		}
281
282	6x	return(out)
283		}
284
285		#' @noRd
286		enframe_params <- function(params) {
287	12x	tbl <- dplyr::as_tibble(params)
288	12x	if (nrow(tbl) == 0L) {
289	!	tbl <- dplyr::tibble(
290	!	rv = NA_real_,
291	!	u_rv = NA_real_,
292	!	a = NA_real_,
293	!	b = NA_real_
294		)
295		}
296	12x	return(tbl)
297		}
298
299		#' Helper to take data splits from parent function, apply different parmas to
300		#' each, bind and return.
301		#' @noRd
302		calculate_split_stats <- function(
303		fun,
304		data,
305		data_splits,
306		term,
307		params_fixed,
308		params_indicative,
309		obs,
310		mod,
311		site,
312		type
313		) {
314	9x	site_stats <- list()
315
316	9x	if ("fixed" %in% names(data_splits)) {
317	9x	if (is.null(params_fixed)) {
318	!	cli::cli_abort(
319	!	"'fixed' data has been identified; please provide parameters via {.field params_fixed}."
320		)
321		}
322
323	9x	site_stats <-
324	9x	append(
325	9x	site_stats,
326	9x	list(
327	9x	fun(
328	9x	data = data_splits$fixed,
329	9x	params = params_fixed,
330	9x	term = term,
331	9x	obs = obs,
332	9x	mod = mod,
333	9x	site = site,
334	9x	type = type
335		)
336		)
337		)
338		}
339
340	9x	if ("indicative" %in% names(data_splits)) {
341	6x	if (is.null(params_indicative)) {
342	!	cli::cli_abort(
343	!	"'indicative' data has been identified; please provide parameters via {.field params_indicative}."
344		)
345		}
346
347	6x	site_stats <-
348	6x	append(
349	6x	site_stats,
350	6x	list(
351	6x	fun(
352	6x	data = data_splits$indicative,
353	6x	params = params_indicative,
354	6x	term = term,
355	6x	obs = obs,
356	6x	mod = mod,
357	6x	site = site,
358	6x	type = type
359		)
360		)
361		)
362		}
363
364	9x	site_stats <- dplyr::bind_rows(site_stats)
365
366	9x	return(site_stats)
367		}
368
369		#' Helper for short-term site stats
370		#' @noRd
371		calc_shortterm_site_stats <- function(
372		data,
373		term,
374		params,
375		obs,
376		mod,
377		site,
378		type
379		) {
380	5x	dplyr::summarise(
381	5x	data,
382	5x	mean_obs = mean(.data[[obs]], na.rm = TRUE),
383	5x	mean_mod = mean(.data[[mod]], na.rm = TRUE),
384	5x	bias = vec_bias(
385	5x	obs = .data[[obs]],
386	5x	mod = .data[[mod]],
387	5x	na.rm = TRUE
388		),
389	5x	rmse = vec_rmse(
390	5x	obs = .data[[obs]],
391	5x	mod = .data[[mod]],
392	5x	na.rm = TRUE
393		),
394	5x	rmsu = vec_rmsu(
395	5x	obs = .data[[obs]],
396	5x	term = term,
397	5x	params = params,
398	5x	na.rm = TRUE
399		),
400	5x	r = vec_cor(
401	5x	obs = .data[[obs]],
402	5x	mod = .data[[mod]],
403	5x	na.rm = TRUE
404		),
405	5x	sd_obs = vec_sd(x = .data[[obs]], term = term, na.rm = TRUE),
406	5x	sd_mod = vec_sd(x = .data[[mod]], term = term, na.rm = TRUE),
407	5x	crmse = vec_crmse(
408	5x	obs = .data[[obs]],
409	5x	mod = .data[[mod]],
410	5x	na.rm = TRUE
411		),
412	5x	mqi = vec_mqi2(
413	5x	rmsu = .data$rmsu,
414	5x	err = abs(.data$rmse),
415	5x	b = params$b
416		),
417	5x	ti_bias = vec_pi_bias2(
418	5x	bias = .data$bias,
419	5x	rmsu = .data$rmsu,
420	5x	b = params$b
421		),
422	5x	ti_cor = vec_pi_cor2(
423	5x	cor = .data$r,
424	5x	sd_o = .data$sd_obs,
425	5x	sd_m = .data$sd_mod,
426	5x	rmsu = .data$rmsu,
427	5x	b = params$b
428		),
429	5x	ti_sd = vec_pi_sd2(
430	5x	sd_o = .data$sd_obs,
431	5x	sd_m = .data$sd_mod,
432	5x	rmsu = .data$rmsu,
433	5x	b = params$b
434		),
435	5x	ti_crmse = vec_ti_crmse2(
436	5x	mqi = .data$mqi,
437	5x	tib = .data$ti_bias
438		),
439	5x	.by = dplyr::all_of(c(site, type))
440		) \|>
441	5x	dplyr::mutate(
442	5x	rmsu_star = .data$rmsu * sqrt(1 + params$b^2),
443	5x	.after = "rmsu"
444		)
445		}
446
447		#' @noRd
448		calc_longterm_site_stats <- function(
449		data,
450		term,
451		params,
452		obs,
453		mod,
454		site,
455		type
456		) {
457	5x	dplyr::summarise(
458	5x	data,
459	5x	obs = .data[[obs]],
460	5x	mod = .data[[mod]],
461	5x	uncer = scalar_uncer(
462	5x	obs = .data[[obs]],
463	5x	term = term,
464	5x	params = params,
465	5x	na.rm = TRUE
466		),
467	5x	ar_low = .data[[obs]] - sqrt(1 + params$b^2) * .data$uncer,
468	5x	ar_up = .data[[obs]] + sqrt(1 + params$b^2) * .data$uncer,
469	5x	mqi = vec_mqi(
470	5x	obs = .data[[obs]],
471	5x	mod = .data[[mod]],
472	5x	term = term,
473	5x	params = params,
474	5x	na.rm = TRUE
475		),
476	5x	.by = dplyr::all_of(c(site, type))
477		)
478		}
479
480		#' @noRd
481		calc_longterm_type_stats <- function(
482		data,
483		term,
484		params,
485		obs,
486		mod,
487		site,
488		type
489		) {
490	5x	dplyr::summarise(
491	5x	data,
492	5x	si_rmse = vec_si_rmse(
493	5x	obs = .data[[obs]],
494	5x	mod = .data[[mod]],
495	5x	term = term,
496	5x	params = params,
497	5x	na.rm = TRUE
498		),
499	5x	si_bias = vec_pi_bias(
500	5x	obs = .data[[obs]],
501	5x	mod = .data[[mod]],
502	5x	term = term,
503	5x	params = params,
504	5x	na.rm = TRUE
505		),
506	5x	si_cor = vec_pi_cor(
507	5x	obs = .data[[obs]],
508	5x	mod = .data[[mod]],
509	5x	term = term,
510	5x	params = params,
511	5x	na.rm = TRUE
512		),
513	5x	si_sd = vec_pi_sd(
514	5x	obs = .data[[obs]],
515	5x	mod = .data[[mod]],
516	5x	term = term,
517	5x	params = params,
518	5x	na.rm = TRUE
519		),
520	5x	.by = dplyr::all_of(type)
521		)
522		}

1		#' Statistical performance indicators
2		#'
3		#' These functions are provided for convenient calculation of modelling the
4		#' listed model quality objectives performance indicators. All functions work on
5		#' numeric R vectors, containing modelled or measured concentrations at a single
6		#' sampling point, or averaged modelled or measured concentrations across
7		#' multiple sampling points.
8		#' - Bias ([vec_bias()])
9		#' - Root Mean Square Error ([vec_rmse()])
10		#' - Maximum Accepted Measurement Uncertainty ([scalar_uncer()])
11		#' - Root Mean Square Uncertainty ([vec_rmsu()])
12		#' - Pearson's Correlation coefficient ([vec_cor()])
13		#' - Standard Deviation ([vec_sd()])
14		#' - Centred Root Mean Square Error ([vec_crmse()])
15		#' - MQI ([vec_mqi()])
16		#'
17		#' @param obs,mod,x A numeric vector of observed (measured) concentrations
18		#' (`obs`), modelled concentrations (`mod`), or either of the previous
19		#' depending on desired output (`x`).
20		#' @param term Either `"short"` or `"long"`, identifying whether `data`
21		#' represents Short- or Long-term data.
22		#' @param params A set of `mqor` parameters, most simply constructed using
23		#' [mqo_params()]. See [mqo_params()] for more information about how
24		#' parameter sets can be constructed.
25		#' @inheritParams base::mean
26		#'
27		#' @family model performance indicators
28		#'
29		#' @returns All `vec_` functions return a single numeric value. `scalar_`
30		#' functions return a numeric vector the same length as `obs` or `mod`.
31		#'
32		#' @rdname statistical-performance-indicators
33		#' @order 1
34		#' @export
35		vec_bias <- function(obs, mod, na.rm = FALSE) {
36	100x	validate_equal(obs, mod)
37	100x	mean(mod, na.rm = na.rm) - mean(obs, na.rm = na.rm)
38		}
39
40		#' @rdname statistical-performance-indicators
41		#' @order 2
42		#' @export
43		vec_rmse <- function(obs, mod, na.rm = FALSE) {
44	55x	validate_equal(obs, mod)
45	55x	summed <- sum((obs - mod)^2, na.rm = na.rm)
46	55x	len <- length(obs)
47	55x	sqrt(summed / len)
48		}
49
50		#' @rdname statistical-performance-indicators
51		#' @order 3
52		#' @export
53		scalar_uncer <- function(obs, term, params = mqo_params(), na.rm = FALSE) {
54	322x	term <- validate_term(term)
55	322x	params$u_rv * sqrt((1 - params$a^2) * obs^2 + (params$a^2 * params$rv^2))
56		}
57
58		#' @rdname statistical-performance-indicators
59		#' @order 4
60		#' @export
61		vec_rmsu <- function(obs, term, params = mqo_params(), na.rm = TRUE) {
62	272x	term <- validate_term(term)
63	272x	uncer <- scalar_uncer(obs = obs, term = term, params = params, na.rm = na.rm)
64	272x	vec_rmsu_uncer(uncer, na.rm)
65		}
66
67		#' Non-exported version of vec_rmsu that takes an uncertainty vector
68		#' Needed for mixed type networks
69		#' @noRd
70		vec_rmsu_uncer <- function(uncer, na.rm) {
71	275x	len <- length(uncer)
72	275x	sqrt(sum(uncer^2, na.rm = na.rm) / len)
73		}
74
75		#' @rdname statistical-performance-indicators
76		#' @order 5
77		#' @export
78		vec_cor <- function(obs, mod, na.rm = FALSE) {
79	55x	validate_equal(obs, mod)
80
81	55x	m_diff <- mod - mean(mod, na.rm = na.rm)
82	55x	o_diff <- obs - mean(obs, na.rm = na.rm)
83	55x	top <- sum(m_diff * o_diff, na.rm = na.rm)
84
85	55x	m_bottom <- sqrt(sum((mod - mean(mod, na.rm = na.rm))^2, na.rm = na.rm))
86	55x	o_bottom <- sqrt(sum((obs - mean(obs, na.rm = na.rm))^2, na.rm = na.rm))
87
88	55x	top / (m_bottom * o_bottom)
89		}
90
91		#' @rdname statistical-performance-indicators
92		#' @order 6
93		#' @export
94		vec_sd <- function(x, term, na.rm = FALSE) {
95	120x	term <- validate_term(term)
96	120x	summed <- sum((x - mean(x, na.rm = na.rm))^2, na.rm = na.rm)
97	120x	len <- length(x)
98	120x	if (term == "long") {
99	26x	len <- len - 1L
100		}
101	120x	sqrt(summed / len)
102		}
103
104		#' @rdname statistical-performance-indicators
105		#' @order 7
106		#' @export
107		vec_crmse <- function(obs, mod, na.rm = FALSE) {
108	47x	validate_equal(obs, mod)
109	47x	m_diff <- mod - mean(mod, na.rm = na.rm)
110	47x	o_diff <- obs - mean(obs, na.rm = na.rm)
111	47x	diffs <- m_diff - o_diff
112	47x	diffs_summed <- sum(diffs^2, na.rm = na.rm)
113	47x	len <- length(obs)
114	47x	sqrt(diffs_summed / len)
115		}
116
117		#' @rdname statistical-performance-indicators
118		#' @order 8
119		#' @export
120		vec_mqi <- function(obs, mod, term, params = mqo_params(), na.rm = FALSE) {
121	45x	validate_equal(obs, mod)
122	45x	term <- validate_term(term)
123
124	45x	rmsu <- vec_rmsu(obs = obs, term = term, params = params, na.rm = na.rm)
125
126	45x	if (term == "short") {
127	!	top <- vec_rmse(obs = obs, mod = mod, na.rm = na.rm)
128		} else {
129	45x	top <- abs(vec_bias(
130	45x	obs = obs,
131	45x	mod = mod,
132	45x	na.rm = na.rm
133		))
134		}
135
136	45x	fct <- sqrt(1 + params$b^2)
137	45x	top / (fct * rmsu)
138		}
139
140		#' Calculate the MQI from existing RMSU & error
141		#' @noRd
142		vec_mqi2 <- function(rmsu, err, b) {
143	47x	fct <- sqrt(1 + b^2)
144	47x	err / (fct * rmsu)
145		}
146
147		#' Complementary performance indicators
148		#'
149		#' These functions complement the MQI/MQO procedure outlined using [vec_mqi()],
150		#' and are used in internally by [plot_mqi_scatter()].
151		#'
152		#' The following indicators are available for both long-term and short-term
153		#' data:
154		#' - Bias performance indicator ([vec_pi_bias()])
155		#' - Correlation performance indicator ([vec_pi_cor()])
156		#' - Standard deviation performance indicator ([vec_pi_sd()])
157		#'
158		#' The following 'temporal' (short-term) indicators are available:
159		#' - Centred Root Mean Square Error temporal indicator ([vec_ti_crmse()])
160		#'
161		#' The following 'spatial' (long-term) indicators are available:
162		#' - Root Mean Square Error spatial indicator ([vec_si_rmse()])
163		#'
164		#' @param obs,mod A numeric vector of observed (measured) concentrations (`obs`)
165		#' or modelled concentrations (`mod`).
166		#'
167		#' @inheritParams vec_bias
168		#'
169		#' @family model performance indicators
170		#'
171		#' @rdname complementary-performance-indicators
172		#' @order 1
173		#' @export
174		vec_pi_bias <- function(obs, mod, term, params, na.rm = FALSE) {
175	5x	validate_equal(obs, mod)
176	5x	term <- validate_term(term)
177
178	5x	bias <- vec_bias(obs = obs, mod = mod, na.rm = na.rm)
179
180	5x	rmsu <- vec_rmsu(obs = obs, term = term, params = params, na.rm = na.rm)
181
182	5x	beta <- sqrt(1 + params$b^2)
183
184	5x	bias / (beta * rmsu)
185		}
186
187		#' @noRd
188		vec_pi_bias2 <- function(bias, rmsu, b) {
189	47x	bias / (sqrt(1 + b^2) * rmsu)
190		}
191
192		#' @rdname complementary-performance-indicators
193		#' @order 2
194		#' @export
195		vec_pi_cor <- function(obs, mod, term, params, na.rm = FALSE) {
196	5x	validate_equal(obs, mod)
197	5x	term <- validate_term(term)
198	5x	R <- vec_cor(obs, mod, na.rm = na.rm)
199	5x	sd_o <- vec_sd(obs, term, na.rm = na.rm)
200	5x	sd_m <- vec_sd(mod, term, na.rm = na.rm)
201	5x	rmsu2 <- vec_rmsu(obs = obs, term = term, params = params, na.rm = na.rm)^2
202	5x	beta <- 1 + params$b^2
203	5x	(1 - R) * ((2 * sd_o * sd_m) / (beta * rmsu2))
204		}
205
206		#' @noRd
207		vec_pi_cor2 <- function(cor, sd_o, sd_m, rmsu, b) {
208	47x	beta <- 1 + b^2
209	47x	rmsu2 <- rmsu^2
210	47x	(1 - cor) * ((2 * sd_o * sd_m) / (beta * rmsu2))
211		}
212
213		#' @rdname complementary-performance-indicators
214		#' @order 3
215		#' @export
216		vec_pi_sd <- function(obs, mod, term, params, na.rm = FALSE) {
217	5x	validate_equal(obs, mod)
218	5x	term <- validate_term(term)
219	5x	sd_o <- vec_sd(obs, term, na.rm = na.rm)
220	5x	sd_m <- vec_sd(mod, term, na.rm = na.rm)
221	5x	rmsu <- vec_rmsu(obs = obs, term = term, params = params, na.rm = na.rm)
222	5x	beta <- sqrt(1 + params$b^2)
223	5x	(sd_m - sd_o) / (beta * rmsu)
224		}
225
226		#' @noRd
227		vec_pi_sd2 <- function(sd_o, sd_m, rmsu, b) {
228	47x	beta <- sqrt(1 + b^2)
229	47x	(sd_m - sd_o) / (beta * rmsu)
230		}
231
232		#' @rdname complementary-performance-indicators
233		#' @order 4
234		#' @export
235		vec_ti_crmse <- function(obs, mod, term, params, na.rm = FALSE) {
236	!	validate_equal(obs, mod)
237	!	term <- validate_term(term)
238	!	if (term == "long") {
239	!	cli::cli_abort(
240	!	"This performance indicator is not supported for long-term data."
241		)
242		}
243
244	!	mqi <- vec_mqi(
245	!	obs = obs,
246	!	mod = mod,
247	!	term = term,
248	!	params = params,
249	!	na.rm = na.rm
250		)
251
252	!	tib <- vec_pi_bias(
253	!	obs = obs,
254	!	mod = mod,
255	!	term = term,
256	!	params = params,
257	!	na.rm = na.rm
258		)
259
260	!	sqrt(mqi^2 - tib^2)
261		}
262
263		#' @noRd
264		vec_ti_crmse2 <- function(mqi, tib) {
265	47x	sqrt(mqi^2 - tib^2)
266		}
267
268		#' @rdname complementary-performance-indicators
269		#' @order 5
270		#' @export
271		vec_si_rmse <- function(obs, mod, term, params, na.rm = FALSE) {
272	5x	validate_equal(obs, mod)
273	5x	term <- validate_term(term)
274
275	5x	if (term == "short") {
276	!	cli::cli_abort(
277	!	"This performance indicator is not supported for short-term data."
278		)
279		}
280
281	5x	rmse <- vec_rmse(obs = obs, mod = mod, na.rm = na.rm)
282
283	5x	rmsu <- vec_rmsu(obs = obs, term = term, params = params, na.rm = na.rm)
284
285	5x	beta <- sqrt(1 + params$b^2)
286
287	5x	rmse / (beta * rmsu)
288		}
289
290		#' Function to ensure vectors are of equal length
291		#' @noRd
292		validate_equal <- function(obs, mod) {
293	323x	if (length(mod) != length(obs)) {
294	1x	cli::cli_abort(
295	1x	"The length of observed values, obs, must be equal to the length of the modelled values, mod."
296		)
297		}
298		}
299
300		#' Check term is appropriate
301		#' @noRd
302		validate_term <- function(x) {
303	781x	rlang::arg_match(x, c("short", "long"))
304		}
305
306		#' Check 'type' col only has either 'fixed' or 'indicative'
307		#' @noRd
308		validate_type <- function(x) {
309	1x	if (any(!x %in% c("fixed", "indicative"))) {
310	1x	cli::cli_abort(
311	1x	"The {.field type} column should only contain either 'fixed' or 'indicative'."
312		)
313		}
314		}

1		#' Plot a scatter chart visualising the Modelling Quality Indicator (MQI)
2		#'
3		#' This function produces a plot comparing modelled and measured concentrations.
4		#' For long-term data, a traditional scatter plot of observed vs modelled
5		#' concentrations is plotted with a shaded area showing the acceptability range
6		#' and dashed boundaries showing the maximum accepted measurement uncertainty.
7		#' For short-term data, a 'bullseye' scatter plot is plotted showing two
8		#' complementary performance indicators with a shaded area showing the MQO
9		#' fulfilment area and a dashed boundary showing the maximum accepted
10		#' measurement uncertainty.
11		#'
12		#' @inheritParams plot_comparison_bars
13		#'
14		#' @param stats_shortterm,stats_longterm The output of [summarise_mqo_stats()].
15		#' All relevant information (e.g., `term`, `params_fixed`, etc.) will be
16		#' passed to this function. `stats_shortterm` should contain short-term
17		#' (temporal) statistics, `stats_longterm` should contain long-term (spatial)
18		#' statistics. Either of these can be `NULL`, but at least one should be
19		#' provided.
20		#'
21		#' @param show_dashed Should extra dashed lines be shown on the figures? These
22		#' show the maximum accepted measurement uncertainty, but can overcrowd the
23		#' plotting area.
24		#'
25		#' @param color_fixed,color_indicative,color_outline The colours to use for
26		#' fixed data, indicative data, and any another annotations. Can be expressed
27		#' as hex codes, or any colours listed in [colors()].
28		#'
29		#' @param gp A [grid::gpar()] object passed to [grid::textGrob()], used to
30		#' control the appearance of the parameter & indicator annotations. `fontsize`
31		#' is likely the most useful argument, but many options are available through
32		#' [grid::gpar()]. Only applies when `interactive = FALSE`.
33		#'
34		#' @family plotting functions
35		#'
36		#' @author Jack Davison
37		#'
38		#' @examples
39		#' long <- summarise_mqo_stats(demo_longterm, pollutant = "PM10")
40		#'
41		#' short <- summarise_mqo_stats(demo_shortterm, pollutant = "PM10")
42		#'
43		#' plot_mqi_scatter(stats_shortterm = short)
44		#'
45		#' plot_mqi_scatter(stats_shortterm = short, stats_longterm = long)
46		#'
47		#' plot_mqi_scatter(stats_longterm = long)
48		#'
49		#' @export
50		plot_mqi_scatter <- function(
51		stats_shortterm = NULL,
52		stats_longterm = NULL,
53		show_dashed = FALSE,
54		color_fixed = "#4269D0",
55		color_indicative = "#EFB118",
56		color_outline = "black",
57		interactive = FALSE,
58		gp = grid::gpar(fontsize = 10)
59		) {
60	6x	validate_mqor_stats(stats_shortterm)
61	6x	validate_mqor_stats(stats_longterm)
62	6x	if (!is.null(stats_shortterm)) {
63	4x	term <- "short"
64	4x	params_fixed <- stats_shortterm$inputs$params_fixed
65	4x	params_indicative <- stats_shortterm$inputs$params_indicative
66
67	4x	if (!"fixed" %in% stats_shortterm$by_site$type) {
68	!	params_fixed <- NULL
69		}
70	4x	if (!"indicative" %in% stats_shortterm$by_site$type) {
71	2x	params_indicative <- NULL
72		}
73
74	4x	uncer <-
75	4x	dplyr::bind_rows(
76	4x	"fixed" = make_circle(1 / sqrt(1 + params_fixed$b^2)),
77	4x	"indicative" = make_circle(1 / sqrt(1 + params_indicative$b^2)),
78	4x	.id = "type"
79		)
80
81	4x	res <- paste0(
82	4x	round(stats_shortterm$summary$mqi_90, 2),
83		" (",
84	4x	ifelse(stats_shortterm$summary$mqo, "Pass", "Fail"),
85		")"
86		)
87
88	4x	if (!is.null(stats_longterm)) {
89	2x	res2 <- paste0(
90	2x	round(stats_longterm$summary$mqi_90, 2),
91		" (",
92	2x	ifelse(stats_longterm$summary$mqo, "Pass", "Fail"),
93		")"
94		)
95		}
96
97	4x	if (interactive) {
98	2x	if (!is.null(stats_longterm)) {
99	1x	html <-
100	1x	glue::glue(
101		"
102	1x	<b>MQI<sub>{term},90th</sub> = {res}</b><br>
103	1x	<b>MQI<sub>long,90th</sub> = {res2}</b><br>
104	1x	α<sub>{term}</sub> = {combine(params_fixed$a, params_indicative$a)}<br>
105	1x	β<sub>{term}</sub> = {combine(params_fixed$b, params_indicative$b)}<br>
106	1x	RV<sub>{term}</sub> = {combine(params_fixed$rv, params_indicative$rv)}<br>
107	1x	U<sub>0r</sub>(RV<sub>{term}</sub>) = {combine(params_fixed$rv, params_indicative$rv)}<br>
108		"
109		)
110		} else {
111	1x	html <-
112	1x	glue::glue(
113	1x	"<b>MQI<sub>{term},90th</sub> = {res}</b><br>
114	1x	α<sub>{term}</sub> = {combine(params_fixed$a, params_indicative$a)}<br>
115	1x	β<sub>{term}</sub> = {combine(params_fixed$b, params_indicative$b)}<br>
116	1x	RV<sub>{term}</sub> = {combine(params_fixed$rv, params_indicative$rv)}<br>
117	1x	U<sub>0r</sub>(RV<sub>{term}</sub>) = {combine(params_fixed$rv, params_indicative$rv)}<br>"
118		)
119		}
120
121	2x	html2 <-
122	2x	glue::glue(
123		'
124	2x	TI<sub>B</sub> = {fmt_pi(stats_shortterm$summary, "ti_bias_90")}<br>
125	2x	TI<sub>R</sub> = {fmt_pi(stats_shortterm$summary, "ti_cor_90")}<br>
126	2x	TI<sub>SD</sub> = {fmt_pi(stats_shortterm$summary, "ti_sd_90")}<br>
127		'
128		)
129
130		# get outline color RGB - for the highlight area
131	2x	color_outline_rgb <- grDevices::col2rgb(col = color_outline)
132
133	2x	plt <-
134	2x	plotly::plot_ly(
135	2x	colors = c(
136	2x	"fixed" = color_fixed,
137	2x	"indicative" = color_indicative
138		)
139		) \|>
140	2x	plotly::add_ribbons(
141	2x	data = make_circle(1),
142	2x	x = ~x,
143	2x	ymin = ~ymin,
144	2x	ymax = ~ymax,
145	2x	color = I(color_outline),
146	2x	fillcolor = grDevices::rgb(
147	2x	red = color_outline_rgb[["red", 1]],
148	2x	green = color_outline_rgb[["green", 1]],
149	2x	blue = color_outline_rgb[["blue", 1]],
150	2x	maxColorValue = 255,
151	2x	alpha = 255 * 0.05
152		),
153	2x	showlegend = FALSE,
154	2x	hoverinfo = "skip"
155		)
156
157	2x	if (show_dashed) {
158	!	plt <-
159	!	plotly::add_ribbons(
160	!	p = plt,
161	!	data = uncer,
162	!	x = ~x,
163	!	ymin = ~ymin,
164	!	ymax = ~ymax,
165	!	color = ~type,
166	!	line = list(dash = "dot"),
167	!	fillcolor = grDevices::rgb(0, 0, 0, 0),
168	!	legendgroup = ~type,
169	!	showlegend = FALSE
170		)
171		}
172
173	2x	plt <-
174	2x	plt \|>
175	2x	plotly::add_markers(
176	2x	x = stats_shortterm$by_site$ti_crmse,
177	2x	y = stats_shortterm$by_site$ti_bias,
178	2x	color = stats_shortterm$by_site$type,
179	2x	text = stats_shortterm$by_site$site,
180	2x	legendgroup = stats_shortterm$by_site$type
181		) \|>
182	2x	plotly::layout(
183	2x	xaxis = list(
184	2x	range = range(c(-2, 2)),
185	2x	constrain = "domain",
186	2x	title = "TI<sub>CRMSE</sub>"
187		),
188	2x	yaxis = list(
189	2x	range = range(c(-2, 2)),
190	2x	constrain = "domain",
191	2x	scaleanchor = "x",
192	2x	scaleratio = 1,
193	2x	title = "TI<sub>BIAS</sub>"
194		),
195	2x	legend = list(
196	2x	orientation = "h",
197	2x	y = 1.1
198		)
199		) \|>
200	2x	plotly::add_annotations(
201	2x	text = html,
202	2x	y = 0.8,
203	2x	x = 0.25,
204	2x	yref = "y domain",
205	2x	xref = "x domain",
206	2x	align = "left",
207	2x	valign = "top"
208		) \|>
209	2x	plotly::add_annotations(
210	2x	text = html2,
211	2x	y = 0.1,
212	2x	x = 0.9,
213	2x	yref = "y domain",
214	2x	xref = "x domain",
215	2x	align = "right",
216	2x	valign = "bottom"
217		)
218		} else {
219	2x	sub <- paste(term, "90th", sep = ",")
220
221	2x	grob_mqi <- grid::textGrob(bquote(MQI[.(sub)] == .(res)), gp = gp)
222
223	2x	if (!is.null(stats_longterm)) {
224	1x	sub2 <- paste("long", "90th", sep = ",")
225	1x	grob_mqi2 <- grid::textGrob(bquote(MQI[.(sub2)] == .(res2)), gp = gp)
226		} else {
227	1x	grob_mqi2 <- grid::textGrob("")
228		}
229
230	2x	grob_a <- grid::textGrob(
231	2x	bquote(
232	2x	alpha[.(term)] ==
233	2x	.(combine(
234	2x	params_fixed$a,
235	2x	params_indicative$a
236		))
237		),
238	2x	gp = gp
239		)
240	2x	grob_b <- grid::textGrob(
241	2x	bquote(
242	2x	beta[.(term)] == .(combine(params_fixed$b, params_indicative$b))
243		),
244	2x	gp = gp
245		)
246	2x	grob_rv <- grid::textGrob(
247	2x	bquote(
248	2x	RV[.(term)] == .(combine(params_fixed$rv, params_indicative$rv))
249		),
250	2x	gp = gp
251		)
252	2x	grob_urv <- grid::textGrob(
253	2x	bquote(
254	2x	U[Or](RV[.(term)]) ==
255	2x	.(combine(params_fixed$u_rv, params_indicative$u_rv, TRUE))
256		),
257	2x	gp = gp
258		)
259
260	2x	grob_tib <- grid::textGrob(
261	2x	bquote(TI[B] == .(fmt_pi(stats_shortterm$summary, "ti_bias_90"))),
262	2x	gp = gp
263		)
264	2x	grob_tir <- grid::textGrob(
265	2x	bquote(TI[R] == .(fmt_pi(stats_shortterm$summary, "ti_cor_90"))),
266	2x	gp = gp
267		)
268	2x	grob_tis <- grid::textGrob(
269	2x	bquote(TI[SD] == .(fmt_pi(stats_shortterm$summary, "ti_sd_90"))),
270	2x	gp = gp
271		)
272
273	2x	plt <-
274	2x	ggplot2::ggplot(stats_shortterm$by_site) +
275	2x	ggplot2::geom_ribbon(
276	2x	data = make_circle(1),
277	2x	ggplot2::aes(
278	2x	x = .data$x,
279	2x	ymin = .data$ymin,
280	2x	ymax = .data$ymax
281		),
282	2x	fill = color_outline,
283	2x	color = color_outline,
284	2x	alpha = 0.05
285		)
286
287	2x	if (show_dashed) {
288	!	plt <-
289	!	plt +
290	!	ggplot2::geom_ribbon(
291	!	data = uncer,
292	!	ggplot2::aes(
293	!	x = .data$x,
294	!	ymin = .data$ymin,
295	!	ymax = .data$ymax,
296	!	color = .data$type
297		),
298	!	fill = NA,
299	!	lty = 2,
300	!	show.legend = dplyr::n_distinct(stats_shortterm$by_site$type) > 1L
301		)
302		}
303
304	2x	plt <- plt +
305	2x	ggplot2::geom_hline(yintercept = 0) +
306	2x	ggplot2::geom_vline(xintercept = 0) +
307	2x	ggplot2::geom_point(
308	2x	ggplot2::aes(
309	2x	x = .data$ti_crmse,
310	2x	y = .data$ti_bias,
311	2x	color = .data$type
312		),
313	2x	show.legend = dplyr::n_distinct(stats_shortterm$by_site$type) > 1L
314		) +
315	2x	ggplot2::coord_equal() +
316	2x	ggplot2::scale_x_continuous(
317	2x	expand = ggplot2::expansion(),
318	2x	limits = c(-2, 2)
319		) +
320	2x	ggplot2::scale_y_continuous(
321	2x	expand = ggplot2::expansion(),
322	2x	limits = c(-2, 2)
323		) +
324	2x	ggplot2::theme_bw(14) +
325	2x	ggplot2::labs(
326	2x	x = expression(TI[CRMSE]),
327	2x	y = expression(TI[BIAS]),
328	2x	color = NULL
329		) +
330	2x	ggplot2::theme(
331	2x	legend.justification = "left",
332	2x	legend.spacing = ggplot2::unit(0.2, "cm"),
333	2x	legend.margin = ggplot2::margin(0, 0, 0, 0, unit = "cm"),
334	2x	legend.title = ggplot2::element_text(face = "bold")
335		) +
336	2x	ggplot2::scale_color_manual(
337	2x	values = c(
338	2x	"fixed" = color_fixed,
339	2x	"indicative" = color_indicative
340		),
341	2x	aesthetics = c("fill", "colour")
342		) +
343	2x	ggplot2::guides(
344	2x	fill = ggplot2::guide_legend(position = "top"),
345	2x	custom_mqi = ggplot2::guide_custom(grob_mqi, order = 1),
346	2x	custom_mqi2 = ggplot2::guide_custom(grob_mqi2, order = 2),
347	2x	custom_a = ggplot2::guide_custom(
348	2x	grob_a,
349	2x	order = 3,
350	2x	title = "\nUncertainty\nParameters"
351		),
352	2x	custom_b = ggplot2::guide_custom(grob_b, order = 4),
353	2x	custom_rv = ggplot2::guide_custom(grob_rv, order = 5),
354	2x	custom_urv = ggplot2::guide_custom(grob_urv, order = 6),
355	2x	custom_tib = ggplot2::guide_custom(
356	2x	grob_tib,
357	2x	order = 7,
358	2x	title = "\nComplementary\nIndicators"
359		),
360	2x	custom_tir = ggplot2::guide_custom(grob_tir, order = 8),
361	2x	custom_tis = ggplot2::guide_custom(grob_tis, order = 9)
362		)
363		}
364	2x	} else if (!is.null(stats_longterm)) {
365	2x	term <- "long"
366	2x	params_fixed <- stats_longterm$inputs$params_fixed
367	2x	params_indicative <- stats_longterm$inputs$params_indicative
368
369	2x	max_om <- max(
370	2x	c(stats_longterm$by_site$obs, stats_longterm$by_site$mod),
371	2x	na.rm = TRUE
372		)
373
374	2x	build_uncer_bands <- function(params) {
375	4x	if (is.null(params)) {
376	2x	return(data.frame(
377	2x	obs = numeric(),
378	2x	u = numeric(),
379	2x	ar = numeric()
380		))
381		}
382	2x	range_o <- pretty(0:max_om)
383	2x	diff_o <- diff(range_o[1:2])
384
385	2x	uncer <-
386	2x	data.frame(obs = pretty(n = 100, (0 - diff_o):(max_om + diff_o))) \|>
387	2x	dplyr::rowwise() \|>
388	2x	dplyr::mutate(
389	2x	u = vec_rmsu(
390	2x	.data$obs,
391	2x	term = term,
392	2x	params = params,
393	2x	na.rm = TRUE
394		),
395	2x	ar = sqrt(1 + params$b^2) * .data$u
396		) \|>
397	2x	dplyr::ungroup()
398
399	2x	return(uncer)
400		}
401
402	2x	if (!"fixed" %in% stats_longterm$by_site$type) {
403	!	params_fixed <- NULL
404		}
405	2x	if (!"indicative" %in% stats_longterm$by_site$type) {
406	2x	params_indicative <- NULL
407		}
408
409	2x	uncer <-
410	2x	dplyr::bind_rows(
411	2x	"fixed" = build_uncer_bands(params_fixed),
412	2x	"indicative" = build_uncer_bands(params_indicative),
413	2x	.id = "type"
414		)
415
416	2x	res <- paste0(
417	2x	round(stats_longterm$summary$mqi_90, 2),
418		" (",
419	2x	ifelse(stats_longterm$summary$mqo, "Pass", "Fail"),
420		")"
421		)
422
423	2x	if (interactive) {
424	1x	html <-
425	1x	glue::glue(
426	1x	"<b>MQI<sub>{term},90th</sub> = {res}</b><br>
427	1x	α<sub>{term}</sub> = {combine(params_fixed$a, params_indicative$a)}<br>
428	1x	β<sub>{term}</sub> = {combine(params_fixed$b, params_indicative$b)}<br>
429	1x	RV<sub>{term}</sub> = {combine(params_fixed$rv, params_indicative$rv)}<br>
430	1x	U<sub>0r</sub>(RV<sub>{term}</sub>) = {combine(params_fixed$rv, params_indicative$rv)}<br>"
431		)
432
433	1x	html2 <-
434	1x	glue::glue(
435		'
436	1x	SI<sub>RMSE</sub> = {fmt_pi(stats_longterm$by_type, "si_rmse")}<br>
437	1x	SI<sub>B</sub> = {fmt_pi(stats_longterm$by_type, "si_bias")}<br>
438	1x	SI<sub>R</sub> = {fmt_pi(stats_longterm$by_type, "si_cor")}<br>
439	1x	SI<sub>SD</sub> = {fmt_pi(stats_longterm$by_type, "si_sd")}<br>
440		'
441		)
442
443	1x	plt <-
444	1x	plotly::plot_ly(
445	1x	colors = c(
446	1x	"fixed" = color_fixed,
447	1x	"indicative" = color_indicative
448		)
449		) \|>
450	1x	plotly::add_lines(
451	1x	x = ceiling(c(0, max_om)),
452	1x	y = ceiling(c(0, max_om)),
453	1x	color = I(color_outline),
454	1x	hoverinfo = "skip",
455	1x	showlegend = FALSE
456		) \|>
457	1x	plotly::add_ribbons(
458	1x	data = uncer,
459	1x	x = ~obs,
460	1x	ymin = ~ obs - ar,
461	1x	ymax = ~ obs + ar,
462	1x	color = ~type,
463	1x	legendgroup = ~type,
464	1x	opacity = 0.5
465		)
466
467	1x	if (show_dashed) {
468	!	plt <-
469	!	plotly::add_ribbons(
470	!	p = plt,
471	!	data = uncer,
472	!	x = ~obs,
473	!	ymin = ~ obs - u,
474	!	ymax = ~ obs + u,
475	!	color = ~type,
476	!	line = list(dash = "dot"),
477	!	fillcolor = grDevices::rgb(0, 0, 0, 0),
478	!	legendgroup = ~type,
479	!	opacity = 0.5,
480	!	showlegend = FALSE
481		)
482		}
483
484	1x	plt <- plt \|>
485	1x	plotly::add_markers(
486	1x	x = stats_longterm$by_site$obs,
487	1x	y = stats_longterm$by_site$mod,
488	1x	color = stats_longterm$by_site$type,
489	1x	text = stats_longterm$by_site$site
490		) \|>
491	1x	plotly::layout(
492	1x	xaxis = list(
493	1x	range = range(pretty(c(0, max_om))),
494	1x	constrain = "domain",
495	1x	title = "observed concentrations (μg m<sup>-3</sup>)"
496		),
497	1x	yaxis = list(
498	1x	range = range(pretty(c(0, max_om))),
499	1x	constrain = "domain",
500	1x	scaleanchor = "x",
501	1x	scaleratio = 1,
502	1x	title = "modelled concentrations (μg m<sup>-3</sup>)"
503		),
504	1x	legend = list(
505	1x	orientation = "h",
506	1x	y = 1.1
507		)
508		) \|>
509	1x	plotly::add_annotations(
510	1x	text = html,
511	1x	y = 0.8,
512	1x	x = 0.25,
513	1x	yref = "y domain",
514	1x	xref = "x domain",
515	1x	align = "left",
516	1x	valign = "top"
517		) \|>
518	1x	plotly::add_annotations(
519	1x	text = html2,
520	1x	y = 0.1,
521	1x	x = 0.9,
522	1x	yref = "y domain",
523	1x	xref = "x domain",
524	1x	align = "right",
525	1x	valign = "bottom"
526		)
527		} else {
528	1x	sub <- paste(term, "90th", sep = ",")
529
530	1x	grob_mqi <- grid::textGrob(bquote(MQI[.(sub)] == .(res)), gp = gp)
531
532	1x	grob_a <- grid::textGrob(
533	1x	bquote(
534	1x	alpha[.(term)] == .(combine(params_fixed$a, params_indicative$a))
535		),
536	1x	gp = gp
537		)
538	1x	grob_b <- grid::textGrob(
539	1x	bquote(
540	1x	beta[.(term)] == .(combine(params_fixed$b, params_indicative$b))
541		),
542	1x	gp = gp
543		)
544	1x	grob_rv <- grid::textGrob(
545	1x	bquote(
546	1x	RV[.(term)] == .(combine(params_fixed$rv, params_indicative$rv))
547		),
548	1x	gp = gp
549		)
550	1x	grob_urv <- grid::textGrob(
551	1x	bquote(
552	1x	U[Or](RV[.(term)]) ==
553	1x	.(combine(params_fixed$u_rv, params_indicative$u_rv, TRUE))
554		),
555	1x	gp = gp
556		)
557
558	1x	grob_siu <- grid::textGrob(
559	1x	bquote(SI[RMSE] == .(fmt_pi(stats_longterm$by_type, "si_rmse"))),
560	1x	gp = gp
561		)
562	1x	grob_sib <- grid::textGrob(
563	1x	bquote(SI[B] == .(fmt_pi(stats_longterm$by_type, "si_bias"))),
564	1x	gp = gp
565		)
566	1x	grob_sir <- grid::textGrob(
567	1x	bquote(SI[R] == .(fmt_pi(stats_longterm$by_type, "si_cor"))),
568	1x	gp = gp
569		)
570	1x	grob_sis <- grid::textGrob(
571	1x	bquote(SI[SD] == .(fmt_pi(stats_longterm$by_type, "si_sd"))),
572	1x	gp = gp
573		)
574
575	1x	plt <-
576	1x	stats_longterm$by_site \|>
577	1x	ggplot2::ggplot(ggplot2::aes(
578	1x	x = .data$obs,
579	1x	y = .data$mod
580		)) +
581	1x	ggplot2::coord_equal(xlim = c(0, max_om), ylim = c(0, max_om)) +
582	1x	ggplot2::geom_ribbon(
583	1x	data = uncer,
584	1x	ggplot2::aes(
585	1x	ymin = .data$obs - .data$ar,
586	1x	ymax = .data$obs + .data$ar,
587	1x	x = .data$obs,
588	1x	fill = .data$type
589		),
590	1x	inherit.aes = F,
591	1x	show.legend = dplyr::n_distinct(stats_longterm$by_site$type) > 1L,
592	1x	alpha = 0.15
593		) +
594	1x	ggplot2::geom_abline(color = color_outline) +
595	1x	ggplot2::geom_line(
596	1x	data = uncer,
597	1x	ggplot2::aes(
598	1x	x = .data$obs,
599	1x	y = .data$obs + .data$ar,
600	1x	color = .data$type
601		),
602	1x	show.legend = FALSE
603		) +
604	1x	ggplot2::geom_line(
605	1x	data = uncer,
606	1x	ggplot2::aes(
607	1x	x = .data$obs,
608	1x	y = .data$obs - .data$ar,
609	1x	color = .data$type
610		),
611	1x	show.legend = FALSE
612		)
613
614	1x	if (show_dashed) {
615	!	plt <-
616	!	plt +
617	!	ggplot2::geom_line(
618	!	data = uncer,
619	!	ggplot2::aes(
620	!	x = .data$obs,
621	!	y = .data$obs + .data$u,
622	!	color = .data$type
623		),
624	!	show.legend = FALSE,
625	!	lty = 2
626		) +
627	!	ggplot2::geom_line(
628	!	data = uncer,
629	!	ggplot2::aes(
630	!	x = .data$obs,
631	!	y = .data$obs - .data$u,
632	!	color = .data$type
633		),
634	!	show.legend = FALSE,
635	!	lty = 2
636		)
637		}
638
639	1x	plt <-
640	1x	plt +
641	1x	ggplot2::geom_point(
642	1x	size = 3,
643	1x	ggplot2::aes(color = .data$type),
644	1x	show.legend = FALSE
645		) +
646	1x	ggplot2::scale_x_continuous(expand = ggplot2::expansion(c(0, .1))) +
647	1x	ggplot2::scale_y_continuous(expand = ggplot2::expansion(c(0, .1))) +
648	1x	ggplot2::theme_bw(14) +
649	1x	ggplot2::labs(
650	1x	x = format_text("observed concentrations (ug/m3)"),
651	1x	y = format_text("modelled concentrations (ug/m3)"),
652	1x	fill = NULL,
653	1x	color = NULL
654		) +
655	1x	ggplot2::theme(
656	1x	legend.justification = "left",
657	1x	legend.spacing = ggplot2::unit(0.2, "cm"),
658	1x	legend.margin = ggplot2::margin(0, 0, 0, 0, unit = "cm"),
659	1x	legend.title = ggplot2::element_text(face = "bold")
660		) +
661	1x	ggplot2::scale_color_manual(
662	1x	values = c(
663	1x	"fixed" = color_fixed,
664	1x	"indicative" = color_indicative
665		),
666	1x	aesthetics = c("fill", "colour")
667		) +
668	1x	ggplot2::guides(
669	1x	fill = ggplot2::guide_legend(position = "top"),
670	1x	custom_mqi = ggplot2::guide_custom(grob_mqi, order = 1),
671	1x	custom_a = ggplot2::guide_custom(
672	1x	grob_a,
673	1x	order = 2,
674	1x	title = "\nUncertainty\nParameters"
675		),
676	1x	custom_b = ggplot2::guide_custom(grob_b, order = 3),
677	1x	custom_rv = ggplot2::guide_custom(grob_rv, order = 4),
678	1x	custom_urv = ggplot2::guide_custom(grob_urv, order = 5),
679	1x	custom_siu = ggplot2::guide_custom(
680	1x	grob_siu,
681	1x	order = 6,
682	1x	title = "\nComplementary\nIndicators"
683		),
684	1x	custom_sib = ggplot2::guide_custom(grob_sib, order = 7),
685	1x	custom_sir = ggplot2::guide_custom(grob_sir, order = 8),
686	1x	custom_sis = ggplot2::guide_custom(grob_sis, order = 9)
687		)
688		}
689		}
690
691	6x	return(plt)
692		}
693
694		#' @author 'Trevor' on StackOverflow
695		#' @source https://stackoverflow.com/questions/6862742/draw-a-circle-with-ggplot2
696		#' @noRd
697		make_circle <- function(r, xc = 0, yc = 0) {
698	12x	x <- xc + r * cos(seq(0, pi, length.out = 100))
699	12x	ymax <- yc + r * sin(seq(0, pi, length.out = 100))
700	12x	ymin <- yc + r * sin(seq(0, -pi, length.out = 100))
701	12x	data.frame(x = x, ymin = ymin, ymax = ymax)
702		}
703
704		#' Comma delimit two optional values
705		#' @noRd
706		combine <- function(a, b, percentage = FALSE) {
707	24x	if (percentage) {
708	3x	if (!is.null(a)) {
709	3x	a <- paste0(a * 100, "%")
710		}
711	3x	if (!is.null(b)) {
712	1x	b <- paste0(b * 100, "%")
713		}
714		}
715	24x	paste(c(a, b) \|> unique(), collapse = ", ")
716		}
717
718		#' Format performance indicators
719		#' @noRd
720		fmt_pi <- function(vec, x) {
721	20x	paste(round(vec[[x]], 2), collapse = ", ")
722		}

1		#' Conveniently construct a set of parameters for use in
2		#' [mqor][mqor-package] functions
3		#'
4		#' This is a convenience function for assembling complete sets of parameters for
5		#' use across `mqor`. It allows parameter sets to be simply re-used across
6		#' functions. This function simply returns an R list which is indexed by
7		#' `mqor` functions, meaning that users could construct the themselves, or even
8		#' pass a row of a dataframe in its stead.
9		#'
10		#' @param rv Reference value. A numeric value, used to define the
11		#' relationship between measurement uncertainty and concentration level, for
12		#' each pollutant and time aggregation. `rv` is expressed in the same units as
13		#' the concentration of the pollutant.
14		#'
15		#' @param u_rv Maximum accepted relative measurement uncertainty. A numeric
16		#' value, expressed as a percent (`0.1` is equivalent to 10%).
17		#'
18		#' @param a The alpha parameter. Fraction of the maximum accepted
19		#' measurement uncertainty that is not proportional to the concentration,
20		#' ranging between `0` and `1`. It is pollutant and measurement-type specific.
21		#'
22		#' @param b The beta parameter. Ratio between the maximum accepted modelling
23		#' and measurement uncertainties for short- and long-term averages. It is
24		#' pollutant and measurement-type specific, and it determines the stringency
25		#' of the MQO. The stringency factor β corresponds to the maximum ratio of
26		#' uncertainty of modelling applications as specified in the AAQD.
27		#'
28		#' @returns a list of length `5`, with names `term`, `rv`, `u_rv`, `a`, and `b`.
29		#'
30		#' @family parameter constructors
31		#'
32		#' @export
33		mqo_params <- function(
34		rv = NULL,
35		u_rv = NULL,
36		a = NULL,
37		b = NULL
38		) {
39		if (
40	12x	is.null(rv) \|\|
41	12x	is.null(u_rv) \|\|
42	12x	is.null(a) \|\|
43	12x	is.null(b)
44		) {
45	!	cli::cli_abort(
46	!	c(
47	!	"i" = "Please pass a complete set of analysis parameters using {.fun mqor::mqo_params} to the {.field params} argument."
48		),
49	!	call = NULL
50		)
51		}
52
53	12x	list(
54	12x	rv = rv,
55	12x	u_rv = u_rv,
56	12x	a = a,
57	12x	b = b
58		)
59		}
60
61		#' Access 'default' parameters for use in [mqor][mqor-package] functions
62		#'
63		#' This function provides an alternative to [mqo_params()] by allowing users to
64		#' simply filter the [default_params] dataset to create a [mqo_params()]-like
65		#' output suitable for the `params` argument of other `mqor` functions.
66		#'
67		#' @param term,type,pollutant Any of the options present in
68		#' `default_params$term`, `default_params$type` or `default_params$pollutant`.
69		#' Used to filter the [default_params] dataset.
70		#'
71		#' @param resolution An optional value present in `default_params$time`, used to
72		#' distinguish between parameters when multiple short-term time averages are
73		#' present.
74		#'
75		#' @returns a list of length `5`, with names `term`, `rv`, `u_rv`, `a`, and `b`.
76		#'
77		#' @family parameter constructors
78		#'
79		#' @export
80		mqo_params_default <- function(
81		term = "",
82		type = "",
83		pollutant = "",
84		resolution = NULL
85		) {
86	12x	i_term <- rlang::arg_match(term, unique(mqor::default_params$term))
87	12x	i_type <- rlang::arg_match(type, unique(mqor::default_params$type))
88	12x	pollutant <- tolower(pollutant)
89	12x	pollutant[pollutant == "pm25"] <- "pm2.5"
90
91	12x	if (!missing(pollutant) && !pollutant %in% mqor::default_params$pollutant) {
92	!	cli::cli_abort(
93	!	"Cannot identify {.field pollutant}; please use {.fun mqor::mqo_params} or {.fun mqor::mqo_params_default} to construct a parameter set."
94		)
95		}
96
97	12x	i_pollutant <- rlang::arg_match(
98	12x	pollutant,
99	12x	unique(mqor::default_params$pollutant)
100		)
101
102	12x	out <-
103	12x	dplyr::filter(
104	12x	mqor::default_params,
105	12x	term == i_term,
106	12x	type == i_type,
107	12x	pollutant == i_pollutant
108		)
109
110	12x	if (nrow(out) > 1) {
111	!	if (is.null(resolution)) {
112	!	cli::cli_abort(
113	!	"Multiple possible parameters; please provide one of {.code {out$time}} to {.field resolution}."
114		)
115		} else {
116	!	rlang::arg_match(resolution, out$time)
117	!	out <- dplyr::filter(out, .data$time == resolution)
118		}
119		}
120
121	12x	mqo_params(
122	12x	rv = out$rv,
123	12x	u_rv = out$u_rv,
124	12x	a = out$a,
125	12x	b = out$b
126		)
127		}
128
129		#' Convenience function for labelling columns in input `data`
130		#'
131		#' This function provides an interface to easily create a 'data dictionary' so
132		#' that `{mqor}` functions can correctly identify where observed values,
133		#' modelled values, site IDs, etc. are found in an input `data.frame`.
134		#'
135		#' @param obs,mod A character string to identify the columns associated with the
136		#' observed (measured) (`obs`) and modelled (`mod`) concentrations. These will
137		#' be used alongside `params` to calculate relevant metrics.
138		#'
139		#' @param site A character string to identify the column to group `obs` and
140		#' `mod` by when calculating statistics. This does not strictly have to be a
141		#' site name; a site ID or abbreviation would suffice, as long as the labels
142		#' uniquely define separate sampling points.
143		#'
144		#' @param type A character string to identify the column to assign sites as
145		#' either `"fixed"` or `"indicative"`.
146		#'
147		#' @param pollutant A character string to identify the column which
148		#' distinguishes between different pollutant species. This column will be used
149		#' to filter the data before it is calculated.
150		#'
151		#' @param date A character to identify a `POSIXct`/`Date` column. Only relevant
152		#' for short-term data.
153		#'
154		#' @returns a list of length `5`, with names `obs`, `mod`, `site`, `type`,
155		#' `pollutant` and `date`.
156		#'
157		#' @export
158		mqo_dict <- function(
159		obs = "obs",
160		mod = "mod",
161		site = "site",
162		type = "type",
163		pollutant = "param",
164		date = "date"
165		) {
166	14x	list(
167	14x	obs = obs,
168	14x	mod = mod,
169	14x	site = site,
170	14x	type = type,
171	14x	pollutant = pollutant,
172	14x	date = date
173		)
174		}

1		#' Read Short- and Long-Term data in the recommended MQOR data structure
2		#'
3		#' This function reads one or more data files as well as an optional attributes
4		#' file, combines them together, and formats them in a way useful for the
5		#' `{mqor}` package. Short- and Long-Term data will be distinguished using the
6		#' presence of the `"Month"` column, which should only exist in short-term data.
7		#'
8		#' @param file_data Path(s) to one or more CSV files containing short- or
9		#' long-term data, formatted as requested by the DELTA tool. A mix of short-
10		#' and long- term data will fail.
11		#'
12		#' @param file_attributes An optional path to an 'attributes' CSV file, which
13		#' must contain a `"Time_Series"` column used to join the two datasets
14		#' together. The attributes data can contain metadata about the main datasets,
15		#' such as the name of the model, monitoring station coordinates, and so on.
16		#'
17		#' @param delim The delimiter used to separate columns in the input files. The
18		#' default is `";"`.
19		#'
20		#' @author Jack Davison
21		#'
22		#' @export
23		read_mqor <- function(
24		file_data,
25		file_attributes = NULL,
26		delim = ";"
27		) {
28		# read all data files
29	!	df <-
30	!	purrr::map(
31	!	file_data,
32	!	\(x) {
33	!	data.table::fread(x, sep = delim) \|>
34	!	dplyr::mutate(Time_Series = basename(x))
35		}
36		) \|>
37	!	dplyr::bind_rows() \|>
38	!	tibble::tibble()
39
40		# is it short or longterm?
41	!	if ("Month" %in% names(df)) {
42		# if hour is missing, will be daily data - add it back here
43	!	if (!"Hour" %in% names(df)) {
44	!	df$Hour <- 0
45		}
46		# combine y/m/d/h into date
47	!	df <-
48	!	dplyr::mutate(
49	!	df,
50	!	date = lubridate::make_datetime(
51	!	year = .data$Year,
52	!	month = .data$Month,
53	!	day = .data$Day,
54	!	hour = .data$Hour,
55	!	tz = "UTC"
56		),
57	!	.after = "Pollutant"
58		) \|>
59	!	dplyr::select(
60	!	-dplyr::any_of(c("Month", "Day", "Hour"))
61		)
62		}
63
64		# if attributes given, read in and join
65	!	if (!is.null(file_attributes)) {
66	!	attributes <-
67	!	data.table::fread(file_attributes, sep = delim) \|>
68	!	tibble::tibble()
69
70	!	df <-
71	!	dplyr::left_join(
72	!	df,
73	!	attributes,
74	!	by = dplyr::join_by("Time_Series", "Year", "Pollutant", "Stat_Code"),
75	!	suffix = c("", "__mqorextra"),
76	!	relationship = "many-to-one"
77		) \|>
78	!	dplyr::select(
79	!	-dplyr::ends_with("__mqorextra")
80		) \|>
81	!	dplyr::rename(
82	!	type = "Measurement_Type"
83		)
84		}
85
86		# rename some columns for convenience
87	!	df <-
88	!	df \|>
89	!	dplyr::rename(
90	!	stat_code = "Stat_Code",
91	!	param = "Pollutant",
92	!	obs = "Observation",
93	!	mod = "Model"
94		) \|>
95	!	dplyr::rename_with(tolower)
96
97		# if there's a "year" column, make it into "date"
98	!	if (!"date" %in% names(df)) {
99	!	names(df)[names(df) == "year"] <- "date"
100		} else {
101	!	df$year <- NULL
102		}
103
104		# return data
105	!	return(df)
106		}
107
108		#' Write Short- and Long-Term data in the recommended MQOR data structure
109		#'
110		#' This function takes a `data.frame` already in R and writes it to the `{mqor}`
111		#' flat file structure.
112		#'
113		#' @inheritParams data.table::fwrite
114		#'
115		#' @param data A `data.frame` to write to a file.
116		#'
117		#' @param delim The delimiter used to separate columns in the files. The default
118		#' is `";"`.
119		#'
120		#' @param dict See [mqo_dict()] for more information. Acts as a data dictionary
121		#' to specify the columns in the data `{mqor}` should use.
122		#'
123		#' @author Jack Davison
124		#'
125		#' @export
126		write_mqor <- function(data, file = "", delim = ";", dict = mqor::mqo_dict()) {
127	!	data <-
128	!	dplyr::select(
129	!	data,
130	!	dplyr::all_of(c(
131	!	dict$site,
132	!	dict$pollutant,
133	!	dict$date,
134	!	dict$obs,
135	!	dict$mod
136		))
137		)
138
139	!	names(data)[names(data) == dict$obs] <- "Observation"
140	!	names(data)[names(data) == dict$mod] <- "Model"
141	!	names(data)[names(data) == dict$site] <- "Stat_Code"
142	!	names(data)[names(data) == dict$pollutant] <- "Pollutant"
143
144	!	if (inherits(data[[dict$date]], c("POSIXct", "Date"))) {
145	!	data <-
146	!	dplyr::mutate(
147	!	data,
148	!	Year = lubridate::year(.data[[dict$date]]),
149	!	Month = lubridate::month(.data[[dict$date]]),
150	!	Day = lubridate::day(.data[[dict$date]]),
151	!	Hour = lubridate::hour(.data[[dict$date]]),
152	!	.after = "Pollutant",
153	!	.keep = "unused"
154		)
155
156	!	if (all(data$Hour == 0)) {
157	!	data$Hour <- NULL
158		}
159	!	} else if (is.numeric(data[[dict$date]])) {
160	!	names(data)[names(data) == dict$date] <- "Year"
161		}
162
163	!	data.table::fwrite(data, file = file, sep = delim)
164	!	return(invisible(data))
165		}

1		#' Function to read CDF files in the DELTA format
2		#'
3		#' This function uses [ncdf4][ncdf4::ncdf4-package] to read a CDF file of
4		#' short-term air quality data into R as a `data.frame` for further analysis in
5		#' an `{mqor}` analysis pipeline. Alternatively, users can define a directory of
6		#' delimited text files with [read_delta_data_delim()].
7		#'
8		#' @param file A path to a .cdf file, formatted as requested by the DELTA tool.
9		#'
10		#' @param data_type One of `"obs"` or `"mod"`; used to correctly label the data
11		#' as being observations or modelled data for later analysis.
12		#'
13		#' @return a [tibble][tibble::tibble-package]
14		#'
15		#' @family delta reading functions
16		#'
17		#' @export
18		read_delta_data_cdf <- function(file, data_type) {
19	!	rlang::check_installed("ncdf4")
20
21	!	data_type <- rlang::arg_match(data_type, c("obs", "mod"))
22
23	!	nc <- ncdf4::nc_open(file)
24
25	!	model <- gsub(".cdf", "", basename(file))
26
27	!	on.exit(ncdf4::nc_close(nc))
28
29	!	params <-
30	!	ncdf4::ncatt_get(nc, varid = 0)$Parameters \|>
31	!	as.raw() \|>
32	!	rawToChar() \|>
33	!	strsplit(" ") \|>
34	!	(\(x) x[[1]])()
35
36	!	vars <- names(nc$var)
37
38	!	year <- ncdf4::ncatt_get(nc, 0)$Year
39	!	start <- ncdf4::ncatt_get(nc, 0)$StartHour + 1L
40	!	end <- ncdf4::ncatt_get(nc, 0)$EndHour + 1L
41
42	!	dates <-
43	!	seq(
44	!	ISOdate(
45	!	year = year,
46	!	month = 1,
47	!	day = 1,
48	!	hour = 0,
49	!	tz = "GMT"
50		),
51	!	ISOdate(
52	!	year = year,
53	!	month = 12,
54	!	day = 31,
55	!	hour = 23,
56	!	tz = "GMT"
57		),
58	!	by = "hour"
59	!	)[start:end]
60
61	!	data <-
62	!	purrr::map(
63	!	vars,
64	!	function(x) {
65	!	array <- ncdf4::ncvar_get(nc, x)
66	!	array \|>
67	!	t() \|>
68	!	as.data.frame() \|>
69	!	stats::setNames(params) \|>
70	!	dplyr::tibble() \|>
71	!	dplyr::mutate(site = x, date = dates, .before = 0L)
72		},
73	!	.progress = TRUE
74		) \|>
75	!	dplyr::bind_rows() \|>
76	!	tidyr::pivot_longer(
77	!	-c("site", "date"),
78	!	names_to = "param",
79	!	values_to = data_type
80		)
81
82		# drop invalid values
83	!	data[data_type][data[data_type] == -999] <- NA
84
85	!	if (data_type == "mod") {
86		# if modelled data, add a 'model' column
87	!	data <- dplyr::mutate(data, model = model, .before = dplyr::everything())
88		}
89
90	!	return(data)
91		}
92
93		#' Function to read CSV files in the DELTA format
94		#'
95		#' This function uses [data.table::fread()] to read a directory of tabular files
96		#' of short-term air quality data into R as a `data.frame` for further analysis
97		#' in an `{mqor}` analysis pipeline. Alternatively, users can read a single
98		#' netCDF file with [read_delta_data_cdf()].
99		#'
100		#' @param path A path to a directory of delimited files, formatted as requested
101		#' by the DELTA tool.
102		#' @param data_type One of `"obs"` or `"mod"`; used to correctly label the data
103		#' as being observations or modelled data for later analysis.
104		#' @param model_name If `data_type = "mod"`, this value will be used to assign a
105		#' model name to the data.
106		#' @param delim The delimited used to separate columns in the input files. The
107		#' default, `;`, is prescribed by the DELTA tool.
108		#' @param pattern Passed to [dir()] to identify relevant files in `path`. Also
109		#' used to clean the file names to attach site names to the data frames.
110		#'
111		#' @return a [tibble][tibble::tibble-package]
112		#'
113		#' @family delta reading functions
114		#'
115		#' @export
116		read_delta_data_delim <- function(
117		path,
118		data_type,
119		model_name = "MODEL",
120		delim = ";",
121		pattern = ".csv"
122		) {
123	!	data_type <- rlang::arg_match(data_type, c("obs", "mod"))
124
125	!	data <-
126	!	purrr::map2(
127	!	.x = dir(path, full.names = TRUE, pattern = pattern),
128	!	.y = dir(path, full.names = FALSE, pattern = pattern),
129	!	.f = function(x, y) {
130	!	data.table::fread(x, sep = delim, na.strings = "-999") \|>
131	!	dplyr::tibble() \|>
132	!	dplyr::mutate(site = gsub(pattern, "", y), .before = 0L)
133		},
134	!	.progress = TRUE
135		) \|>
136	!	dplyr::bind_rows() \|>
137	!	dplyr::mutate(
138	!	date = ISOdate(
139	!	year = .data$year,
140	!	month = .data$month,
141	!	day = .data$day,
142	!	hour = .data$hour,
143	!	tz = "GMT"
144		),
145	!	.keep = "unused",
146	!	.after = "site"
147		)
148
149	!	for (i in names(data)) {
150	!	if (all(is.na(data[[i]]))) {
151	!	data[i] <- NULL
152		}
153		}
154
155	!	data <-
156	!	tidyr::pivot_longer(
157	!	data,
158	!	-c("site", "date"),
159	!	names_to = "param",
160	!	values_to = data_type
161		)
162
163	!	if (data_type == "mod") {
164		# if modelled data, add a 'model' column
165	!	data <- dplyr::mutate(
166	!	data,
167	!	model = model_name,
168	!	.before = dplyr::everything()
169		)
170		}
171
172	!	return(data)
173		}
174
175		#' Function to read yearly CSV files in the DELTA format
176		#'
177		#' This function reads yearly data formatted as required by the DELTA tool,
178		#' structured in a single file. Monitoring data structured in multiple files can
179		#' alternatively be imported using [read_delta_yearly_dir()].
180		#'
181		#' @param file The path to a single CSV file containing yearly data, formatted
182		#' as requested by the DELTA tool.
183		#'
184		#' @inheritParams read_delta_data_delim
185		#'
186		#' @return a [tibble][tibble::tibble-package]
187		#'
188		#' @family delta reading functions
189		#'
190		#' @export
191		read_delta_yearly_file <- function(
192		file,
193		data_type,
194		model_name = "MODEL",
195		delim = ";"
196		) {
197	!	data_type <- rlang::arg_match(data_type, c("obs", "mod"))
198
199	!	lines <- readLines(file)
200	!	lines <- lines[!startsWith(lines, delim)]
201
202	!	colnames <- strsplit(lines[1], ";")[[1]]
203	!	data <-
204	!	utils::read.table(
205	!	text = lines[-1],
206	!	sep = delim,
207	!	col.names = c("site", colnames[-(1:2)])
208		) \|>
209	!	dplyr::mutate(date = as.integer(colnames[2])) \|>
210	!	tidyr::pivot_longer(
211	!	-c("site", "date"),
212	!	names_to = "param",
213	!	values_to = data_type
214		)
215
216	!	if (data_type == "mod") {
217		# if modelled data, add a 'model' column
218	!	data <- dplyr::mutate(
219	!	data,
220	!	model = model_name,
221	!	.before = dplyr::everything()
222		)
223		}
224
225	!	data[[data_type]][data[[data_type]] == -999] <- NA
226
227	!	return(data)
228		}
229
230		#' Function to read a directory of yearly CSV files in the DELTA format
231		#'
232		#' This function reads yearly data formatted as required by the DELTA tool,
233		#' structured across multiple files. This format is only supported for
234		#' monitoring data. Data structured in a single file can
235		#' alternatively be imported using [read_delta_yearly_file()].
236		#'
237		#' @inheritParams read_delta_data_delim
238		#'
239		#' @return a [tibble][tibble::tibble-package]
240		#'
241		#' @family delta reading functions
242		#'
243		#' @export
244		read_delta_yearly_dir <- function(
245		path,
246		delim = ";",
247		pattern = ".csv"
248		) {
249	!	files <- dir(path, full.names = TRUE, pattern = pattern)
250
251	!	read_file <- function(file) {
252	!	lines <- readLines(file)
253	!	lines <- lines[!startsWith(lines, delim)]
254
255	!	colnames <- strsplit(lines[1], ";")[[1]]
256
257	!	utils::read.table(
258	!	text = lines[2],
259	!	sep = ";",
260	!	col.names = colnames[-(1:2)]
261		) \|>
262	!	dplyr::mutate(
263	!	date = as.integer(colnames[2]),
264	!	site = gsub(pattern, "", basename(file))
265		) \|>
266	!	tidyr::pivot_longer(
267	!	-c("site", "date"),
268	!	names_to = "param",
269	!	values_to = "obs"
270		) \|>
271	!	dplyr::mutate(obs = ifelse(.data$obs == -999, NA, .data$obs))
272		}
273
274	!	purrr::map(files, read_file) \|>
275	!	dplyr::bind_rows()
276		}

1		#' Function to read DELTA tool resource files
2		#'
3		#' These functions read selected files from the DELTA tool's `resource`
4		#' directory. The most useful file for `{mqor}` is likely the `startup` file as
5		#' this contains vital monitoring station metadata, but this function also
6		#' supports the `MyDeltaInput.ini`, `update.ini` and `init.ini` files for
7		#' completeness.
8		#'
9		#' @param file A path to an appropriate DELTA tool resource file; one of
10		#' `init.ini`, `MyDeltaInput.dat`, `startup_*.ini`, or `update.ini`.
11		#'
12		#' @return a list
13		#'
14		#' @family delta reading functions
15		#'
16		#' @export
17		read_delta_resource <- function(file) {
18	!	if (grepl("startup", file)) {
19	!	lines <- readLines(file)
20
21	!	id <- which(lines %in% c("[MODEL]", "[PARAMETERS]", "[MONITORING]"))
22
23	!	model_lines <- lines[(id[1] + 1L):(id[2] - 1L)]
24	!	model_lines <- model_lines[!grepl(";", model_lines)]
25
26	!	year <- as.integer(model_lines[1])
27	!	frequency <- model_lines[2]
28	!	scale <- model_lines[3]
29
30	!	param_lines <- lines[(id[2] + 1L):(id[3] - 1L)]
31	!	param_lines <- param_lines[!startsWith(param_lines, ";")]
32	!	params <- data.table::fread(text = c("param;type;unit", param_lines)) \|>
33	!	dplyr::tibble()
34
35	!	monitoring_lines <- lines[(id[3] + 1L):length(lines)]
36	!	monitoring_lines[!endsWith(monitoring_lines, ";")] <- paste0(
37	!	monitoring_lines[!endsWith(monitoring_lines, ";")],
38		";"
39		)
40
41	!	monitoring <-
42	!	data.table::fread(text = monitoring_lines) \|>
43	!	dplyr::tibble() \|>
44	!	dplyr::select(-dplyr::any_of("V13")) \|>
45	!	dplyr::rename_with(.fn = function(x) gsub(" ", "_", tolower(x))) \|>
46	!	dplyr::mutate(
47	!	listofvariables = strsplit(.data$listofvariables, "\\*")
48		)
49
50	!	out <- list(
51	!	year = year,
52	!	frequency = frequency,
53	!	scale = scale,
54	!	params = params,
55	!	monitoring = monitoring
56		)
57	!	} else if (grepl("update.ini", file) \|\| grepl("init.ini", file)) {
58	!	x <- readLines(file)
59	!	x <- x[
60	!	!startsWith(x, ";") & !startsWith(x, "#") & !startsWith(x, "[") & x != ""
61		]
62	!	out <-
63	!	strsplit(x, "=") \|>
64	!	purrr::map(.f = \(x) list(x[[2]]) \|> stats::setNames(x[[1]])) \|>
65	!	purrr::flatten()
66	!	} else if (grepl("MyDeltaInput", file)) {
67	!	x <- readLines(file)
68	!	x <- x[
69	!	!startsWith(x, ";") & !startsWith(x, "#") & !startsWith(x, "[") & x != ""
70		]
71	!	x <- gsub("\\", "/", x, fixed = TRUE)
72	!	out <- as.list(stats::setNames(x, c("startup", "modeling", "monitoring")))
73		} else {
74	!	cli::cli_abort(
75	!	"File not recognised; please provide one of the 'init.ini', 'MyDeltaInput.dat', 'startup_*.ini' or 'update.ini' files found in the DELTA 'resource' directory."
76		)
77		}
78
79	!	out$file <- file
80
81	!	return(out)
82		}

1		#' Convert the 'observation' and 'modelled' columns of a `mqor`-ready dataset
2		#' into rolling means
3		#'
4		#' This function takes a dataset prepared for `mqo` calculations and replaces
5		#' the `observation` and `modelled` data columns with a rolling equivalent.
6		#' Rolling values are calculated within groups of any categorical columns
7		#' present in the data (likely the "pollutant" and "site" columns, along with
8		#' any metadata).
9		#'
10		#' @inheritParams summarise_mqo_stats
11		#'
12		#' @param window_size Integer vector giving rolling window size(s). This is the
13		#' total number of included values. Defaults to `8L`, which is the most useful
14		#' for ozone statistics.
15		#'
16		#' @param min_coverage The minimum data coverage percent, expressed as a decimal
17		#' (i.e., this option should be between `0` and `1`, representing 0% and 100%).
18		#'
19		#' @param progress Show a progress bar? Passed to [purrr::map()].
20		#'
21		#' @seealso `openair::rollingMean()` for a more flexible and performant version
22		#' of `mutate_rolling_mean()`.
23		#'
24		#' @family data utilities
25		#'
26		#' @export
27		mutate_rolling_mean <- function(
28		data,
29		window_size = 8L,
30		min_coverage = 0.75,
31		progress = TRUE,
32		dict = mqor::mqo_dict()
33		) {
34	!	validate_coverage(min_coverage)
35	!	data <- dplyr::arrange(data, .data[[dict$date]])
36
37	!	cat_columns <-
38	!	data \|>
39	!	dplyr::select(dplyr::where(is.character) \| dplyr::where(is.factor)) \|>
40	!	names()
41
42	!	purrr::map(
43	!	split(data, data[c(cat_columns)], drop = TRUE),
44	!	\(x) {
45	!	x[[dict$obs]] <- data.table::frollmean(
46	!	x = x[[dict$obs]],
47	!	n = window_size,
48	!	align = "right"
49		)
50
51	!	x[[dict$mod]] <- data.table::frollmean(
52	!	x = x[[dict$mod]],
53	!	n = window_size,
54	!	align = "right"
55		)
56
57	!	flag_obs <- !is.na(x[[dict$obs]])
58	!	obs_dc <- data.table::frollmean(
59	!	x = flag_obs,
60	!	n = window_size,
61	!	align = "right"
62		)
63
64	!	flag_mod <- !is.na(x[[dict$mod]])
65	!	mod_dc <- data.table::frollmean(
66	!	x = flag_mod,
67	!	n = window_size,
68	!	align = "right"
69		)
70
71	!	x[[dict$obs]] <- dplyr::if_else(obs_dc <= min_coverage, NA, x[[dict$obs]])
72	!	x[[dict$mod]] <- dplyr::if_else(mod_dc <= min_coverage, NA, x[[dict$mod]])
73
74	!	return(x)
75		},
76	!	.progress = progress
77		) \|>
78	!	dplyr::bind_rows()
79		}
80
81		#' Calculate daily summaries of a `mqor`-ready dataset
82		#'
83		#' These functions take a dataset prepared for `mqo` calculations and calculates
84		#' a daily or annual mean, maximum or minimum value. Averages are calculated
85		#' within groups of any categorical columns present in the data (likely the
86		#' "pollutant" and "site" columns, along with any metadata).
87		#'
88		#' @inheritParams summarise_mqo_stats
89		#'
90		#' @param statistic One of `"mean"`, `"min"`, or `"max"`, representing the
91		#' average, minimum or maximum
92		#'
93		#' @param min_coverage The minimum data coverage percent, expressed as a decimal
94		#' (i.e., this option should be between `0` and `1`, representing 0% and
95		#' 100%). Note that calculations of data coverage can only be calculated for
96		#' the data presented; the user must provide a complete timeseries for an
97		#' accurate coverage calculation.
98		#'
99		#' @seealso `openair::timeAverage()` for a more flexible and performant version
100		#' of `summarise_daily()`.
101		#'
102		#' @family data utilities
103		#'
104		#' @rdname summarise-temporal
105		#' @order 1
106		#' @export
107		summarise_daily <- function(
108		data,
109		statistic = "mean",
110		min_coverage = 0.75,
111		dict = mqor::mqo_dict()
112		) {
113	!	summarise_temporal(
114	!	data = data,
115	!	statistic = statistic,
116	!	min_coverage = min_coverage,
117	!	dict = dict,
118	!	floor = "day"
119		)
120		}
121
122		#' @rdname summarise-temporal
123		#' @order 2
124		#' @export
125		summarise_annual <- function(
126		data,
127		statistic = "mean",
128		min_coverage = 0.75,
129		dict = mqor::mqo_dict()
130		) {
131	!	data <-
132	!	summarise_temporal(
133	!	data = data,
134	!	statistic = statistic,
135	!	min_coverage = min_coverage,
136	!	dict = dict,
137	!	floor = "year"
138		)
139
140	!	data[[dict$date]] <- lubridate::year(data[[dict$date]])
141
142	!	return(data)
143		}
144
145		#' @noRd
146		summarise_temporal <- function(
147		data,
148		statistic,
149		min_coverage,
150		dict,
151		floor = c("day", "year")
152		) {
153	!	validate_coverage(min_coverage)
154	!	fun <- get_fun(statistic)
155
156		# get categorical columns to summarise by
157	!	cat_columns <-
158	!	data \|>
159	!	dplyr::select(dplyr::where(is.character) \| dplyr::where(is.factor)) \|>
160	!	names()
161
162		# summarise data
163	!	out <-
164	!	data \|>
165	!	dplyr::mutate(date = lubridate::floor_date(.data[[dict$date]], floor)) \|>
166	!	dplyr::summarise(
167	!	obs = fun(.data[[dict$obs]], min_coverage),
168	!	mod = fun(.data[[dict$mod]], min_coverage),
169	!	.by = dplyr::all_of(c("date", cat_columns))
170		)
171
172		# restore original modelled/observed names
173	!	names(out)[names(out) == "mod"] <- dict$mod
174	!	names(out)[names(out) == "obs"] <- dict$obs
175	!	names(out)[names(out) == "date"] <- dict$date
176
177		# return data
178	!	return(out)
179		}
180
181
182		#' @noRd
183		get_fun <- function(statistic) {
184	!	if (statistic == "mean") {
185	!	fun <- safe_mean
186	!	} else if (statistic == "max") {
187	!	fun <- safe_max
188	!	} else if (statistic == "min") {
189	!	fun <- safe_min
190		}
191		}
192
193		#' @noRd
194		safe_mean <- function(x, cov) {
195	!	if (all(is.na(x)) \| mean(!is.na(x)) < cov) {
196		NA
197		} else {
198	!	mean(x, na.rm = TRUE)
199		}
200		}
201
202		#' @noRd
203		safe_max <- function(x, cov) {
204	!	if (all(is.na(x)) \| mean(!is.na(x)) < cov) {
205		NA
206		} else {
207	!	max(x, na.rm = TRUE)
208		}
209		}
210
211		#' @noRd
212		safe_min <- function(x, cov) {
213	!	if (all(is.na(x)) \| mean(!is.na(x)) < cov) {
214		NA
215		} else {
216	!	max(x, na.rm = TRUE)
217		}
218		}
219
220		#' @noRd
221		validate_coverage <- function(min_coverage) {
222	!	if (min_coverage < 0 \|\| min_coverage > 1 \|\| !is.numeric(min_coverage)) {
223	!	cli::cli_abort(
224	!	"{.field min_coverage} must be a number between {0L} and {1L}."
225		)
226		}
227		}
228
229		#' Validate that all observations have a matching modelled value, and vice-versa
230		#'
231		#' When calculating statistics, it is important that the methodology is applied
232		#' to observation-model pairs. This function can help identify and fix this situation.
233		#'
234		#' @param data A `data.frame` with numeric columns `dict$obs` and `dict$mod`.
235		#'
236		#' @param mode One of `"fix"` (the default), `"error"` or `"warn"`. If `"fix"`
237		#' the function will return a dataframe with any mismatched values set to
238		#' `NA`. If `"error"` or `"warn"`, the the function will either error or warn
239		#' if mismatched values are detected.
240		#'
241		#' @inheritParams summarise_mqo_stats
242		#'
243		#' @author Jack Davison
244		#' @family data utilities
245		#' @export
246		validate_mod_obs_pairs <- function(
247		data,
248		mode = c("fix", "error", "warn"),
249		dict = mqor::mqo_dict()
250		) {
251	!	mode <- rlang::arg_match(mode)
252
253	!	if (mode == "fix") {
254	!	data <-
255	!	dplyr::mutate(
256	!	data,
257	!	`__new_obs__` = dplyr::if_else(
258	!	is.na(.data[[dict$mod]]),
259	!	NA,
260	!	.data[[dict$obs]]
261		),
262	!	`__new_mod__` = dplyr::if_else(
263	!	is.na(.data[[dict$obs]]),
264	!	NA,
265	!	.data[[dict$mod]]
266		)
267		)
268
269	!	data[[dict$obs]] <- data[["__new_obs__"]]
270	!	data[[dict$mod]] <- data[["__new_mod__"]]
271	!	data[["__new_mod__"]] <- data[["__new_obs__"]] <- NULL
272		}
273
274	!	if (mode %in% c("error", "warn")) {
275	!	if (mode == "error") {
276	!	fun <- cli::cli_abort
277		} else {
278	!	fun <- cli::cli_warn
279		}
280	!	id <- is.na(data[[dict$mod]]) != is.na(data[[dict$obs]])
281
282	!	if (any(id)) {
283	!	fun(
284	!	"Some modelled values do not have matching observations, or vice-versa."
285		)
286		}
287		}
288
289	!	return(data)
290		}

1		#' Calculate a ranked percentile of a numeric vector
2		#'
3		#' This function provides a convenient method to calculate a percentile value
4		#' from a numeric vector. The vector, `x`, is ranked in ascending order and the
5		#' nth percentile value is inferred by linear interpolation.
6		#'
7		#' @param x A numeric vector from which to calculate a percentile value.
8		#'
9		#' @param quantile A quantile value between `0` and `1`. The default, `0.9`,
10		#' calculates the 90th percentile.
11		#'
12		#' @inheritParams stats::quantile
13		#'
14		#' @family data utilities
15		#'
16		#' @author Jack Davison
17		#'
18		#' @examples
19		#' mqis <- seq(0.1, 1, by = 0.1)
20		#'
21		#' mqo_percentile(mqis)
22		#'
23		#' mqo_percentile(mqis[1:9])
24		#'
25		#' @export
26		mqo_percentile <- function(x, quantile = 0.9, na.rm = FALSE) {
27	20x	if (!na.rm & anyNA(x)) {
28	1x	cli::cli_abort(
29	1x	"Missing values in {.field x} not allowed if {.field na.rm} is {FALSE}."
30		)
31		}
32
33	19x	if (!dplyr::between(quantile, 0, 1)) {
34	1x	cli::cli_abort("{.field quantile} must be between {0} and {1}.")
35		}
36
37	18x	x <- x[!is.na(x)]
38
39	18x	x <- sort(x)
40
41	18x	n <- floor(length(x) * quantile)
42
43	18x	d <- (length(x) * quantile) - n
44
45	18x	return(x[n] + (x[n + 1] - x[n]) * d)
46		}
47
48		#' @noRd
49		determine_term <- function(data, term, dict) {
50	10x	if (is.null(term)) {
51	8x	sites <- data[[dict$site]]
52	8x	x <- as.vector(table(sites))
53	8x	if (any(x > 1)) {
54	4x	term <- "short"
55		} else {
56	4x	term <- "long"
57		}
58	8x	cli::cli_inform(c(
59	8x	"!" = "{.field term} assumed to be '{term}'.",
60	8x	"i" = "If this is incorrect, please specify the data's term using the {.field term} argument."
61		))
62		} else {
63	2x	term <- validate_term(term)
64		}
65
66	9x	return(term)
67		}
68
69		#' @noRd
70		determine_resolution <- function(data, term, dict) {
71	12x	if (term == "long") {
72	6x	return("annual")
73		}
74
75	6x	counts <-
76	6x	purrr::map(
77	6x	split(data, data[[dict$site]]),
78	6x	function(x) {
79	90x	difftime(x[[dict$date]], dplyr::lag(x[[dict$date]]), units = "secs")
80		}
81		) \|>
82	6x	purrr::list_c() \|>
83	6x	table()
84
85	6x	max_splits <- as.numeric(names(sort(counts)[1]))
86
87	6x	if (max_splits == 86400) {
88	6x	return("daily")
89	!	} else if (max_splits == 3600) {
90	!	return("hourly")
91		} else {
92	!	cli::cli_abort(
93	!	"Cannot determine time resolution of data, please use {.fun mqor::mqo_params} or {.fun mqor::mqo_params_default} directly."
94		)
95		}
96		}

1		#' Function to read DELTA tool configuration files
2		#'
3		#' These functions read selected files from the DELTA tool's `configuration`
4		#' directory. These are likely not overly useful for `{mqor}` as it currently
5		#' stands, but may be useful in future if further features of the DELTA toolkit
6		#' are incorporated into `{mqor}` or similar packages.
7		#'
8		#' @param file A path to an appropriate DELTA tool configuration file, e.g.,
9		#' `axis_type.dat`.
10		#'
11		#' @return a list
12		#'
13		#' @family delta reading functions
14		#'
15		#' @export
16		read_delta_config <- function(file) {
17	!	if (grepl("aqibounds", file)) {
18	!	read_aqibounds(file)
19		} else {
20	!	read_config(file)
21		}
22		}
23
24		#' Read any config file
25		#' @noRd
26		read_config <- function(file) {
27		# read raw data - drop anything beginning with ; or #
28	!	x <- readLines(file)
29	!	x <- x[!startsWith(x, ";")]
30	!	x <- x[!startsWith(x, "#")]
31
32		# get first row and format as vector of headers
33	!	header <- x[1]
34	!	header <- gsub("[", "", header, fixed = TRUE)
35	!	header <- gsub("]", "", header, fixed = TRUE)
36	!	header <- strsplit(header, ": ")[[1]][2]
37
38		# some are delimited by commas, some by ;
39	!	delim <- ","
40	!	if (!grepl(",", header)) {
41	!	delim <- ";"
42		}
43
44		# split headers into vector, ignoring delimters within parentheses
45	!	split_outside_parentheses <- function(text) {
46		# Pattern: match a comma not inside parentheses
47	!	pattern <- paste0(delim, "(?![^()]*\\))")
48
49		# Split the string using the pattern
50	!	result <- strsplit(text, pattern, perl = TRUE)[[1]]
51
52		# Trim whitespace
53	!	result <- trimws(result)
54
55	!	return(result)
56		}
57
58		# tidy up names
59	!	header <- split_outside_parentheses(header)
60	!	header <- header[header != "" & header != "\""]
61	!	header <- gsub("\"", "", header, fixed = TRUE)
62
63		# read in data
64	!	if (length(x[-1]) != 0) {
65	!	out <- utils::read.delim(text = x[-1], sep = ";", header = F)
66	!	out <- stats::setNames(out, header[1:ncol(out)])
67
68	!	for (i in names(out)) {
69	!	if (all(is.na(out[[i]]))) {
70	!	out[[i]] <- NULL
71		}
72		}
73		} else {
74		# if empty, return empty dataframe
75	!	out <- data.frame()
76	!	for (i in header) {
77	!	out <- cbind(out, data.frame(character(0)) \|> stats::setNames(i))
78		}
79		}
80
81	!	out$file <- file
82
83	!	return(tibble::tibble(out))
84		}
85
86		#' Special handling for the AQIBounds
87		#' @noRd
88		read_aqibounds <- function(file) {
89	!	x <- readLines(file)
90	!	x <- x[!startsWith(x, ";") & !startsWith(x, "#")]
91	!	x <- x[x != ""]
92	!	aqi_tables <-
93	!	tibble::enframe(x, name = NULL) \|>
94	!	dplyr::mutate(id = ifelse(grepl("ID", .data$value), .data$value, NA)) \|>
95	!	tidyr::fill("id") \|>
96	!	dplyr::distinct() \|>
97	!	split(~id, drop = TRUE)
98
99	!	read_aqi_tbl <- function(aqi) {
100	!	aqi <- aqi$value
101	!	aqi <- aqi[aqi != "END"]
102	!	name <- trimws(strsplit(aqi[1], "=")[[1]][2])
103	!	ncat <- trimws(strsplit(aqi[2], "=")[[1]][2])
104	!	cat_names <- strsplit(trimws(strsplit(aqi[8], "=")[[1]][2]), ",")[[1]] \|>
105	!	trimws()
106
107	!	n <- which(startsWith(aqi, "Cat_Names")) - 1
108
109	!	lims <-
110	!	strsplit(trimws(aqi[3:n]), "=") \|>
111	!	purrr::map(trimws)
112
113	!	purrr::map(
114	!	lims,
115	!	function(x) {
116	!	data.frame(
117	!	value = strsplit(x[[2]], ", ")[[1]] \|> trimws() \|> as.numeric()
118		) \|>
119	!	stats::setNames(x[[1]])
120		}
121		) \|>
122	!	dplyr::bind_cols() \|>
123	!	dplyr::slice_tail(n = -1) \|>
124	!	dplyr::mutate(
125	!	aqi = name,
126	!	category = rev(cat_names),
127	!	.before = dplyr::everything()
128		)
129		}
130
131	!	purrr::map(aqi_tables, read_aqi_tbl) \|>
132	!	dplyr::bind_rows() \|>
133	!	dplyr::tibble() \|>
134	!	tidyr::pivot_longer(
135	!	cols = -c("aqi", "category"),
136	!	values_to = "value",
137	!	names_to = "param"
138		)
139		}

1		#' Plot a bar chart visualising the Modelling Quality Indicator (MQI)
2		#'
3		#' This function produces an ordered bar chart visualising the modelling quality
4		#' indicator (MQI) for each `site` in the dataset. It also draws the 90th
5		#' percentile of MQIs as a horizontal line; if this is below unity (`1`) the
6		#' Modelling Quality Objective has been met. Unlike [plot_comparison_bars()],
7		#' both long-term and short-term MQIs/MQOs are visualised identically - although
8		#' input `params` and calculated `mqi` and `mqo` values will differ.
9		#'
10		#' @inheritParams plot_comparison_bars
11		#'
12		#' @param color_bars,color_outline The colours to use to fill the bars, and for
13		#' the outlines/horizontal lines. Can be expressed as hex codes, or any
14		#' colours listed in [colors()].
15		#'
16		#' @family plotting functions
17		#'
18		#' @author Jack Davison
19		#'
20		#' @examples
21		#' plot_mqi_bars(
22		#' summarise_mqo_stats(demo_longterm, pollutant = "PM10")
23		#' )
24		#'
25		#' plot_mqi_bars(
26		#' summarise_mqo_stats(demo_shortterm, pollutant = "PM10")
27		#' )
28		#'
29		#' @export
30		plot_mqi_bars <- function(
31		stats,
32		color_bars = "grey85",
33		color_outline = "black",
34		interactive = FALSE
35		) {
36	4x	validate_mqor_stats(stats)
37	4x	term <- stats$inputs$term
38
39	4x	stats$by_site$site <- forcats::fct_reorder(
40	4x	stats$by_site$site,
41	4x	stats$by_site$mqi
42		)
43
44	4x	plotdata <- stats$by_site
45
46	4x	mqi90 <- stats$summary$mqi_90
47
48	4x	if (interactive) {
49	2x	rlang::check_installed("plotly")
50
51	2x	plt <-
52	2x	plotly::plot_ly() \|>
53	2x	plotly::add_trace(
54	2x	x = plotdata$site,
55	2x	y = plotdata$mqi,
56	2x	type = "bar",
57	2x	color = I(color_bars),
58	2x	marker = list(line = list(color = color_outline, width = 1))
59		) \|>
60	2x	plotly::add_annotations(
61	2x	y = mqi90 + 0.02,
62	2x	xref = "paper",
63	2x	x = 0.025,
64	2x	text = as.character(round(mqi90, 2)),
65	2x	showarrow = FALSE
66		) \|>
67	2x	plotly::add_annotations(
68	2x	y = 1.02,
69	2x	xref = "paper",
70	2x	x = 0.025,
71	2x	text = "1.00",
72	2x	showarrow = FALSE
73		) \|>
74	2x	plotly::layout(
75	2x	shapes = list(
76	2x	hline(1, color = color_outline, dash = "solid"),
77	2x	hline(mqi90, color = color_outline, dash = "dot")
78		),
79	2x	xaxis = list(
80	2x	title = "Sampling Points"
81		),
82	2x	yaxis = list(
83	2x	title = paste0("MQI<sub>", term, "</sub>"),
84	2x	fixedrange = TRUE
85		)
86		)
87		} else {
88	2x	plt <-
89	2x	ggplot2::ggplot(
90	2x	plotdata,
91	2x	ggplot2::aes(x = .data$site, y = .data$mqi)
92		) +
93	2x	ggplot2::geom_col(
94	2x	width = (2 / 3),
95	2x	fill = color_bars,
96	2x	color = color_outline
97		) +
98	2x	ggplot2::geom_hline(yintercept = 1L, lty = 1, color = color_outline) +
99	2x	ggplot2::geom_hline(yintercept = mqi90, lty = 2, color = color_outline) +
100	2x	ggplot2::theme_bw(14) +
101	2x	ggplot2::scale_y_continuous(
102	2x	expand = ggplot2::expansion(c(0, .1)),
103	2x	sec.axis = ggplot2::sec_axis(~., breaks = c(1, round(mqi90, 2)))
104		) +
105	2x	ggplot2::labs(
106	2x	x = "Sampling Points",
107	2x	y = if (term == "short") {
108	1x	expression(MQI[short])
109	2x	} else if (term == "long") {
110	1x	expression(MQI[long])
111		}
112		)
113		}
114
115	4x	return(plt)
116		}
117
118		#' Helper
119		#' @noRd
120		hline <- function(y = 0, color = "black", dash = "dash") {
121	4x	list(
122	4x	type = "line",
123	4x	x0 = 0,
124	4x	x1 = 1,
125	4x	xref = "paper",
126	4x	y0 = y,
127	4x	y1 = y,
128	4x	line = list(color = color, dash = dash)
129		)
130		}

1		#' @method print mqor_stats
2		#' @export
3		print.mqor_stats <- function(x, ...) {
4	!	cli::cli_h1("{.pkg mqor} Statistics Set")
5	!	if (x$summary$mqo) {
6	!	cli::cli_alert_success("{.field MQO} Passed")
7		} else {
8	!	cli::cli_alert_danger("{.field MQO} Failed")
9		}
10	!	cli::cli_dl(
11	!	c(
12	!	Pollutant = x$inputs$pollutant,
13	!	Term = x$inputs$term
14		)
15		)
16
17	!	cli::cli_h2("Objects")
18	!	cli::cli_ul(id = "mqorlist")
19	!	for (i in names(x)) {
20	!	if (is.data.frame(x[[i]])) {
21	!	cli::cli_li("x{.field ${i}} ({nrow(x[[i]])} row{?s})")
22		} else {
23	!	cli::cli_li("x{.field ${i}}")
24		}
25		}
26	!	cli::cli_end(id = "mqorlist")
27		}
28
29		#' @method summary mqor_stats
30		#' @export
31		summary.mqor_stats <- function(object, ...) {
32	!	object$summary
33		}
34
35		#' @method plot mqor_stats
36		#' @export
37		plot.mqor_stats <- function(x, ...) {
38	!	if (x$inputs$term == "long") {
39	!	plot_mqi_scatter(stats_longterm = x, ...)
40	!	} else if (x$inputs$term == "short") {
41	!	plot_mqi_scatter(stats_shortterm = x, ...)
42		}
43		}
44
45		#' @noRd
46		is.mqor_stats <- function(x) {
47	26x	inherits(x, "mqor_stats")
48		}
49
50		#' @noRd
51		validate_mqor_stats <- function(stats) {
52	32x	if (!is.null(stats) && !is.mqor_stats(stats)) {
53	!	cli::cli_abort(
54	!	c(
55	!	"!" = "This function expects an {.pkg mqor} statistics set generated by {.fun mqor::summarise_mqo_stats}.",
56	!	"i" = "You have provided {.type {stats}}."
57		),
58	!	call = NULL
59		)
60		}
61		}

1		#' Plot a 'report' summarising indicators for both short- and long-term model
2		#' quality objectives
3		#'
4		#' This function produces a series of one-dimensional scatter plots reporting
5		#' all of the spatial and temporal indicators, as well as the MQIs, contained
6		#' within short- and long-term statistics objects. In non-interactive plots, a
7		#' dotted vertical line indicates the 90th percentile (for MQIs & TIs) or
8		#' maximum (for SIs) values for each statistic.
9		#'
10		#' @inheritParams plot_mqi_scatter
11		#'
12		#' @param color_target The colour to use for highlighting the target range for
13		#' each statistic (always `0` to `1`). This colour will recieve an alpha value
14		#' of `0.15` to ensure the scatter markers are still visible against it.
15		#'
16		#' @param censor The maximum value to show in the 'report'. Any values above
17		#' this will be reduced to this value. The default, `2`, is twice the size of
18		#' the objective of `1` and is appropriate for most use cases.
19		#'
20		#' @family plotting functions
21		#'
22		#' @author Jack Davison
23		#'
24		#' @returns a [patchwork][patchwork::patchwork-package] assemblage
25		#'
26		#' @examples
27		#' long <- summarise_mqo_stats(demo_longterm, pollutant = "PM10")
28		#'
29		#' short <- summarise_mqo_stats(demo_shortterm, pollutant = "PM10")
30		#'
31		#' plot_mqi_report(short, long)
32		#'
33		#' @export
34		plot_mqi_report <- function(
35		stats_shortterm = NULL,
36		stats_longterm = NULL,
37		color_fixed = "#4269D0",
38		color_indicative = "#EFB118",
39		color_outline = "black",
40		color_target = "green4",
41		censor = 2,
42		interactive = FALSE
43		) {
44	2x	validate_mqor_stats(stats_shortterm)
45	2x	validate_mqor_stats(stats_longterm)
46
47	2x	if (!interactive) {
48	1x	plot_pi_bar <- function(data, x, xintercept, ylab) {
49	9x	data[[x]][data[[x]] > censor] <- censor
50
51	9x	ggplot2::ggplot(data, ggplot2::aes(x = abs(.data[[x]]), y = "")) +
52	9x	ggplot2::annotate(
53	9x	geom = "rect",
54	9x	ymax = Inf,
55	9x	ymin = -Inf,
56	9x	xmin = -Inf,
57	9x	xmax = 1,
58	9x	alpha = 0.15,
59	9x	fill = color_target
60		) +
61	9x	ggplot2::geom_vline(
62	9x	xintercept = min(c(censor, xintercept)),
63	9x	lty = 3,
64	9x	color = color_outline
65		) +
66	9x	ggplot2::geom_vline(
67	9x	xintercept = censor,
68	9x	lty = 2,
69	9x	color = color_outline
70		) +
71	9x	ggplot2::geom_jitter(width = 0, ggplot2::aes(color = .data$type)) +
72	9x	ggplot2::scale_x_continuous(
73	9x	limits = c(0, censor),
74	9x	sec.axis = ggplot2::sec_axis(
75		~.,
76	9x	breaks = xintercept,
77	9x	labels = signif(xintercept, 2)
78		)
79		) +
80	9x	ggplot2::scale_color_manual(
81	9x	values = c(
82	9x	"fixed" = color_fixed,
83	9x	"indicative" = color_indicative
84		),
85	9x	aesthetics = c("fill", "colour")
86		) +
87	9x	ggplot2::labs(x = NULL, y = ylab, color = NULL) +
88	9x	ggplot2::theme_bw(14) +
89	9x	ggplot2::theme(
90	9x	axis.title.y = ggplot2::element_text(
91	9x	angle = 0,
92	9x	vjust = 0.5,
93	9x	hjust = 1
94		)
95		) +
96	9x	ggplot2::coord_cartesian(clip = "off")
97		}
98
99	1x	if (!is.null(stats_shortterm)) {
100	1x	plots_short <- list(
101	1x	plot_pi_bar(
102	1x	stats_shortterm$by_site,
103	1x	"mqi",
104	1x	stats_shortterm$summary$mqi_90,
105	1x	bquote(MQI[short])
106		) +
107	1x	ggplot2::labs(title = "Performance in Time"),
108	1x	plot_pi_bar(
109	1x	stats_shortterm$by_site,
110	1x	"ti_bias",
111	1x	stats_shortterm$summary$ti_bias_90,
112	1x	bquote(TI[B])
113		),
114	1x	plot_pi_bar(
115	1x	stats_shortterm$by_site,
116	1x	"ti_cor",
117	1x	stats_shortterm$summary$ti_cor_90,
118	1x	bquote(TI[R])
119		),
120	1x	plot_pi_bar(
121	1x	stats_shortterm$by_site,
122	1x	"ti_sd",
123	1x	stats_shortterm$summary$ti_sd_90,
124	1x	bquote(TI[sigma])
125		)
126		)
127		} else {
128	!	plots_short <- NULL
129		}
130
131	1x	if (!is.null(stats_longterm)) {
132	1x	plots_long <- list(
133	1x	plot_pi_bar(
134	1x	stats_longterm$by_site,
135	1x	"mqi",
136	1x	stats_longterm$summary$mqi_90,
137	1x	bquote(MQI[long])
138		) +
139	1x	ggplot2::labs(title = "Performance in Space"),
140	1x	plot_pi_bar(
141	1x	stats_longterm$by_type,
142	1x	"si_rmse",
143	1x	max(stats_longterm$by_type$si_rmse),
144	1x	bquote(SI[RMSE])
145		),
146	1x	plot_pi_bar(
147	1x	stats_longterm$by_type,
148	1x	"si_bias",
149	1x	max(stats_longterm$by_type$si_bias),
150	1x	bquote(SI[B])
151		),
152	1x	plot_pi_bar(
153	1x	stats_longterm$by_type,
154	1x	"si_cor",
155	1x	max(stats_longterm$by_type$si_cor),
156	1x	bquote(SI[R])
157		),
158	1x	plot_pi_bar(
159	1x	stats_longterm$by_type,
160	1x	"si_sd",
161	1x	max(stats_longterm$by_type$si_sd),
162	1x	bquote(SI[sigma])
163		)
164		)
165		} else {
166	!	plots_long <- NULL
167		}
168
169	1x	plts <- list()
170	1x	if (!is.null(plots_short)) {
171	1x	plts <- append(plts, plots_short)
172		}
173	1x	if (!is.null(plots_short) && !is.null(plots_long)) {
174	1x	plts <- append(plts, list(patchwork::plot_spacer()))
175		}
176	1x	if (!is.null(plots_long)) {
177	1x	plts <- append(plts, plots_long)
178		}
179
180	1x	plt <-
181	1x	patchwork::wrap_plots(
182	1x	ncol = 1,
183	1x	plts
184		) +
185	1x	patchwork::plot_layout(guides = "collect", axes = "collect") &
186	1x	ggplot2::theme(
187	1x	legend.position = ifelse(
188	1x	dplyr::n_distinct(stats_shortterm$by_site$type) > 1,
189	1x	"bottom",
190	1x	"none"
191		),
192	1x	legend.justification = "left"
193		)
194
195	1x	return(plt)
196		}
197
198	1x	if (interactive) {
199	1x	add_report_plotly <- function(plt, data, showlegend = FALSE) {
200	2x	plt \|>
201	2x	plotly::add_bars(
202	2x	inherit = FALSE,
203	2x	y = dplyr::distinct(data, .data$name)$name,
204	2x	x = rep(2, dplyr::n_distinct(data$name)),
205	2x	marker = list(line = list(color = color_outline, width = 1)),
206	2x	color = I(grDevices::rgb(0, 0, 0, 0)),
207	2x	hoverinfo = "skip",
208	2x	showlegend = FALSE,
209	2x	colors = c(
210	2x	"fixed" = color_fixed,
211	2x	"indicative" = color_indicative
212		)
213		) \|>
214	2x	plotly::add_bars(
215	2x	inherit = FALSE,
216	2x	y = dplyr::distinct(data, .data$name)$name,
217	2x	x = rep(1, dplyr::n_distinct(data$name)),
218	2x	color = I(col),
219	2x	hoverinfo = "skip",
220	2x	showlegend = FALSE,
221	2x	colors = c(
222	2x	"fixed" = color_fixed,
223	2x	"indicative" = color_indicative
224		)
225		) \|>
226	2x	plotly::add_markers(
227	2x	colors = c(
228	2x	"fixed" = color_fixed,
229	2x	"indicative" = color_indicative
230		),
231	2x	showlegend = showlegend
232		) \|>
233	2x	plotly::layout(
234	2x	barmode = "overlay",
235	2x	xaxis = list(
236	2x	range = c(-0.1, censor),
237	2x	mirror = TRUE,
238	2x	title = ""
239		),
240	2x	yaxis = list(
241	2x	title = "",
242	2x	fixedrange = TRUE
243		),
244	2x	legend = list(
245	2x	orientation = "h",
246	2x	y = 1.1
247		),
248	2x	showlegend = TRUE
249		)
250		}
251
252	1x	col <- t(grDevices::col2rgb(color_target))
253	1x	col <- grDevices::rgb(
254	1x	red = col[1, "red"],
255	1x	green = col[1, "green"],
256	1x	blue = col[1, "blue"],
257	1x	alpha = 255 * 0.15,
258	1x	maxColorValue = 255
259		)
260
261	1x	if (!is.null(stats_shortterm)) {
262	1x	shortdat <-
263	1x	stats_shortterm$by_site \|>
264	1x	dplyr::select(c("site", "type", "mqi", "ti_bias", "ti_cor", "ti_sd")) \|>
265	1x	tidyr::pivot_longer(c("mqi", "ti_bias", "ti_cor", "ti_sd")) \|>
266	1x	dplyr::mutate(
267	1x	name = factor(
268	1x	.data$name,
269	1x	rev(c("mqi", "ti_bias", "ti_cor", "ti_sd")),
270	1x	rev(c(
271	1x	"MQI<sub>short</sub>",
272	1x	"TI<sub>B</sub>",
273	1x	"TI<sub>R</sub>",
274	1x	"TI<sub>SD</sub>"
275		))
276		),
277	1x	value = abs(.data$value),
278	1x	value = ifelse(.data$value > censor, censor, .data$value)
279		)
280
281	1x	plt_short <-
282	1x	shortdat \|>
283	1x	plotly::plot_ly(
284	1x	y = ~name,
285	1x	x = ~ abs(value),
286	1x	color = ~type,
287	1x	colors = c(
288	1x	"fixed" = color_fixed,
289	1x	"indicative" = color_indicative
290		),
291	1x	legendgroup = ~type,
292	1x	text = ~site
293		) \|>
294	1x	add_report_plotly(shortdat, showlegend = TRUE)
295		} else {
296	!	plt_short <- NULL
297		}
298
299	1x	if (!is.null(stats_longterm)) {
300	1x	longdat <-
301	1x	dplyr::bind_rows(
302	1x	dplyr::select(
303	1x	stats_longterm$by_site,
304	1x	c("site", "type", "mqi")
305		) \|>
306	1x	tidyr::pivot_longer("mqi"),
307	1x	tidyr::pivot_longer(
308	1x	data = stats_longterm$by_type,
309	1x	-"type"
310		)
311		) \|>
312	1x	dplyr::mutate(
313	1x	name = factor(
314	1x	.data$name,
315	1x	rev(c("mqi", "si_rmse", "si_bias", "si_cor", "si_sd")),
316	1x	rev(c(
317	1x	"MQI<sub>long</sub>",
318	1x	"SI<sub>RMSE</sub>",
319	1x	"SI<sub>B</sub>",
320	1x	"SI<sub>R</sub>",
321	1x	"SI<sub>SD</sub>"
322		))
323		),
324	1x	value = abs(.data$value),
325	1x	value = ifelse(.data$value > censor, censor, .data$value)
326		)
327
328	1x	plt_long <-
329	1x	longdat \|>
330	1x	plotly::plot_ly(
331	1x	y = ~name,
332	1x	x = ~ abs(value),
333	1x	color = ~type,
334	1x	text = ~site,
335	1x	legendgroup = ~type,
336	1x	showlegend = is.null(stats_shortterm)
337		) \|>
338	1x	add_report_plotly(longdat, showlegend = is.null(stats_shortterm))
339		} else {
340	!	plt_long <- NULL
341		}
342
343	1x	if (!is.null(plt_short) && !is.null(plt_long)) {
344	1x	plt <-
345	1x	plotly::subplot(
346	1x	shareX = TRUE,
347	1x	titleY = TRUE,
348	1x	nrows = 2,
349	1x	plt_short,
350	1x	plt_long,
351	1x	heights = c(4 / 9, 5 / 9)
352		)
353	!	} else if (!is.null(plt_short)) {
354	!	plt <- plt_short
355	!	} else if (!is.null(plt_long)) {
356	!	plt <- plt_long
357		}
358
359	1x	return(plt)
360		}
361		}

1		#' Plot a bar chart comparing observed (measured) and modelled concentrations
2		#'
3		#' This function produces a plot comparing modelled and measured concentrations.
4		#' For long-term data, the provided annual averaged measured data is visualised
5		#' alongside the performance acceptability range of the observations. For
6		#' short-term data, the RMSE and RMSU*0 are visualised.
7		#'
8		#' @param stats The output of [summarise_mqo_stats()]. All relevant information
9		#' (e.g., `term`, `params_fixed`, etc.) will be passed to this function.
10		#'
11		#' @param interactive If `FALSE`, the default, a static `ggplot2` graphic will
12		#' be returned which can be saved as a PNG, SVG, or other similar format. If
13		#' `TRUE`, a dynamic HTML widget will be returned created by `plotly`.
14		#'
15		#' @param color_obs,color_mod,color_outline The colours to use to fill the
16		#' 'observation' bars, 'modelled' bars, and for the outlines/error bars. Can
17		#' be expressed as hex codes, or any colours listed in [colors()].
18		#'
19		#' @family plotting functions
20		#'
21		#' @author Jack Davison
22		#'
23		#' @examples
24		#' plot_comparison_bars(
25		#' summarise_mqo_stats(demo_longterm, pollutant = "PM10")
26		#' )
27		#'
28		#' plot_comparison_bars(
29		#' summarise_mqo_stats(demo_shortterm, pollutant = "PM10")
30		#' )
31		#'
32		#' @export
33		plot_comparison_bars <- function(
34		stats,
35		color_obs = "grey85",
36		color_mod = "black",
37		color_outline = "black",
38		interactive = FALSE
39		) {
40	4x	validate_mqor_stats(stats)
41	4x	if (interactive) {
42	2x	rlang::check_installed("plotly")
43		}
44	4x	term <- stats$inputs$term
45
46	4x	if (term == "long") {
47	2x	plotdata <-
48	2x	stats$by_site \|>
49	2x	tidyr::pivot_longer(cols = c("mod", "obs"), names_to = "stat") \|>
50	2x	dplyr::mutate(
51	2x	ar_low = dplyr::if_else(.data$stat == "mod", NA, .data$ar_low),
52	2x	ar_up = dplyr::if_else(.data$stat == "mod", NA, .data$ar_up),
53	2x	error = dplyr::if_else(
54	2x	.data$stat == "mod",
55	2x	0,
56	2x	.data$ar_up - .data$value
57		)
58		) \|>
59	2x	dplyr::mutate(
60	2x	stat = factor(
61	2x	.data$stat,
62	2x	c("obs", "mod"),
63	2x	c("Observations", "Modelled Values")
64		)
65		)
66
67	2x	if (interactive) {
68		# need to split the data, to overcome known issue in {plotly}
69		# <https://github.com/plotly/plotly.R/issues/762>
70	1x	plotdata_splits <- split(plotdata, plotdata$stat)
71
72	1x	plt <-
73	1x	plotly::plot_ly() \|>
74	1x	plotly::add_trace(
75	1x	type = "bar",
76	1x	x = plotdata_splits$Observations[["site"]],
77	1x	y = plotdata_splits$Observations$value,
78	1x	error_y = list(
79	1x	array = plotdata_splits$Observations$error,
80	1x	color = color_outline
81		),
82	1x	name = "Observations",
83	1x	color = I(color_obs),
84	1x	marker = list(line = list(color = color_outline, width = 1))
85		) \|>
86	1x	plotly::add_trace(
87	1x	type = "bar",
88	1x	x = plotdata_splits$`Modelled Values`[["site"]],
89	1x	y = plotdata_splits$`Modelled Values`$value,
90	1x	name = "Modelled Values",
91	1x	color = I(color_mod),
92	1x	marker = list(line = list(color = color_outline, width = 1))
93		) \|>
94	1x	plotly::layout(
95	1x	xaxis = list(title = "Sampling Points"),
96	1x	yaxis = list(
97	1x	title = "Concentrations (μg m<sup>-3</sup>)",
98	1x	fixedrange = TRUE
99		),
100	1x	legend = list(orientation = "h", y = 1.1),
101	1x	showlegend = TRUE
102		)
103		} else {
104	1x	plt <-
105	1x	ggplot2::ggplot(
106	1x	plotdata,
107	1x	ggplot2::aes(x = .data[["site"]], y = .data$value)
108		) +
109	1x	ggplot2::geom_col(
110	1x	ggplot2::aes(fill = .data$stat),
111	1x	position = ggplot2::position_dodge2(width = 2 / 3),
112	1x	width = 2 / 3,
113	1x	color = color_outline
114		) +
115	1x	ggplot2::geom_linerange(
116	1x	ggplot2::aes(
117	1x	ymax = .data$ar_up,
118	1x	ymin = .data$ar_low,
119	1x	group = .data$stat
120		),
121	1x	na.rm = TRUE,
122	1x	position = ggplot2::position_dodge2(width = 2 / 3)
123		) +
124	1x	ggplot2::theme_bw(14) +
125	1x	ggplot2::scale_y_continuous(expand = ggplot2::expansion(c(0, .1))) +
126	1x	ggplot2::scale_fill_manual(values = c(color_obs, color_mod)) +
127	1x	ggplot2::theme(legend.position = "top", legend.justification = "left") +
128	1x	ggplot2::labs(
129	1x	x = "Sampling Points",
130	1x	y = format_text("Concentrations (ug/m3)"),
131	1x	fill = NULL
132		)
133		}
134		}
135
136	4x	if (term == "short") {
137	2x	plotdata <-
138	2x	stats$by_site \|>
139	2x	dplyr::select(dplyr::all_of(c("site", "rmse", "rmsu_star"))) \|>
140	2x	tidyr::pivot_longer(
141	2x	cols = -"site",
142	2x	names_to = "stat"
143		) \|>
144	2x	dplyr::mutate(
145	2x	stat = factor(.data$stat, c("rmse", "rmsu_star"))
146		)
147
148	2x	if (interactive) {
149	1x	plotdata_splits <- split(plotdata, plotdata$stat)
150
151	1x	plt <-
152	1x	plotly::plot_ly() \|>
153	1x	plotly::add_trace(
154	1x	type = "bar",
155	1x	x = plotdata_splits$rmse[["site"]],
156	1x	y = plotdata_splits$rmse$value,
157	1x	name = "RMSE",
158	1x	color = I(color_obs),
159	1x	marker = list(line = list(color = color_outline, width = 1))
160		) \|>
161	1x	plotly::add_trace(
162	1x	type = "bar",
163	1x	x = plotdata_splits$rmsu_star[["site"]],
164	1x	y = plotdata_splits$rmsu_star$value,
165	1x	name = "RMSU<sub>0</sub><sup>*</sup>",
166	1x	color = I(color_mod),
167	1x	marker = list(line = list(color = color_outline, width = 1))
168		) \|>
169	1x	plotly::layout(
170	1x	xaxis = list(title = "Sampling Points"),
171	1x	yaxis = list(
172	1x	title = "Concentrations (μg m<sup>-3</sup>)",
173	1x	fixedrange = TRUE
174		),
175	1x	legend = list(orientation = "h", y = 1.1),
176	1x	showlegend = TRUE
177		)
178		} else {
179	1x	plt <-
180	1x	ggplot2::ggplot(
181	1x	plotdata,
182	1x	ggplot2::aes(x = .data[["site"]], y = .data$value)
183		) +
184	1x	ggplot2::geom_col(
185	1x	ggplot2::aes(fill = .data$stat),
186	1x	position = ggplot2::position_dodge2(width = 2 / 3),
187	1x	width = 2 / 3,
188	1x	color = color_outline
189		) +
190	1x	ggplot2::theme_bw(14) +
191	1x	ggplot2::scale_y_continuous(expand = ggplot2::expansion(c(0, .1))) +
192	1x	ggplot2::scale_fill_manual(
193	1x	values = c(color_obs, color_mod),
194	1x	labels = c(expression(RMSE), expression(RMSU[O]^"*"))
195		) +
196	1x	ggplot2::theme(legend.position = "top", legend.justification = "left") +
197	1x	ggplot2::labs(
198	1x	x = "Sampling Points",
199	1x	y = format_text("Concentration (ug/m3)"),
200	1x	fill = NULL
201		)
202		}
203		}
204
205	4x	return(plt)
206		}

1		#' Write tables of MQO summary data to a file
2		#'
3		#' This function takes the output of [summarise_mqo_stats()] and writes it to a
4		#' file. Either short-term or long-term data can be written, or both at once.
5		#'
6		#' @inheritParams plot_mqi_report
7		#'
8		#' @inheritParams data.table::fwrite
9		#'
10		#' @param digits Integer indicating the number of decimal places to be used when
11		#' rounding numeric values.
12		#'
13		#' @param file Output file name. `""` indicates output to the console.
14		#'
15		#' @param delim The delimiter used to separate columns in the files. The default
16		#' is `";"`.
17		#'
18		#' @author Jack Davison
19		#'
20		#' @export
21		write_mqo_stats <- function(
22		stats_shortterm = NULL,
23		stats_longterm = NULL,
24		file = "",
25		digits = 2,
26		delim = ";"
27		) {
28	3x	validate_mqor_stats(stats_longterm)
29	3x	validate_mqor_stats(stats_shortterm)
30
31	3x	fmt <- function(x) {
32	14x	stringr::str_replace_all(x, "_", " ") \|>
33	14x	stringr::str_to_title()
34		}
35
36	3x	write_empty_line <- function() {
37	18x	data.table::fwrite(as.list(""), file = file, append = TRUE)
38		}
39
40	3x	if (file.exists(file)) {
41	!	unlink(file, force = TRUE)
42		}
43
44	3x	write_stats <- function(stats) {
45	4x	title <- glue::glue(
46	4x	"{fmt(stats$inputs$term)}-term {stats$inputs$pollutant} "
47		)
48
49	4x	data.table::fwrite(as.list(title), file = file, append = TRUE)
50	4x	write_empty_line()
51
52	4x	data.table::fwrite(as.list("Parameters"), file = file, append = TRUE)
53	4x	dplyr::bind_rows(
54	4x	"Fixed" = stats$inputs$params_fixed,
55	4x	"Indicative" = stats$inputs$params_indicative,
56	4x	.id = "type"
57		) \|>
58	4x	data.table::fwrite(
59	4x	file = file,
60	4x	append = TRUE,
61	4x	col.names = TRUE,
62	4x	row.names = FALSE,
63	4x	sep = delim,
64	4x	quote = FALSE
65		)
66
67	4x	purrr::walk(
68	4x	names(stats)[names(stats) != "inputs"],
69	4x	function(x) {
70	10x	write_empty_line()
71	10x	data.table::fwrite(as.list(fmt(x)), file = file, append = TRUE)
72	10x	suppressWarnings(
73	10x	data.table::fwrite(
74	10x	x = stats[[x]] \|>
75	10x	dplyr::mutate(dplyr::across(dplyr::where(is.numeric), \(x) {
76	72x	round(x, digits = digits)
77		})),
78	10x	file = file,
79	10x	append = TRUE,
80	10x	col.names = TRUE,
81	10x	row.names = FALSE,
82	10x	sep = delim,
83	10x	quote = FALSE
84		)
85		)
86		}
87		)
88
89	4x	write_empty_line()
90		}
91
92	3x	if (!is.null(stats_shortterm)) {
93	2x	write_stats(stats_shortterm)
94		}
95	3x	if (!is.null(stats_longterm)) {
96	2x	write_stats(stats_longterm)
97		}
98
99	3x	return(invisible(file))
100		}

1		#' Quickly plot observed (measured) and modelled concentrations over time in
2		#' short-term data
3		#'
4		#' This is a useful function for examining short-term data after it has been
5		#' filtered or aggregated, but before it statistics are calculated. Unlike
6		#' other plotting functions in `{mqor}`, this function does not take the
7		#' output of [summarise_mqo_stats()].
8		#'
9		#' @param data An R `data.frame` containing at least four columns; a numeric
10		#' column of observed values, a numeric column of modelled values, a character
11		#' or factor column of identifiers that identify the site associated with the
12		#' concentrations, and a character or factor column of identifiers that
13		#' identify the pollutant being measured/modelled. See [demo_shortterm] for an
14		#' example format.
15		#'
16		#' @param pollutant The pollutant to plot. Should be a value contained within
17		#' `data[[dict$pollutant]]`.
18		#'
19		#' @param site The station to plot.s Should be a value contained within
20		#' `data[[dict$site]]`.
21		#'
22		#' @param dict See [mqo_dict()] for more information. Acts as a data dictionary
23		#' to specify the columns in the data `{mqor}` should use.
24		#'
25		#' @param color_obs,color_mod The colours to use for the 'observation' and
26		#' 'modelled' lines. Can be expressed as hex codes, or any colours listed in
27		#' [colors()].
28		#'
29		#' @inheritParams plot_comparison_bars
30		#'
31		#' @family plotting functions
32		#'
33		#' @author Jack Davison
34		#'
35		#' @export
36		plot_timeseries <- function(
37		data,
38		pollutant,
39		site,
40		color_obs = "#6CC5B0",
41		color_mod = "black",
42		dict = mqor::mqo_dict(),
43		interactive = FALSE
44		) {
45	2x	pollutant <- rlang::arg_match(
46	2x	pollutant,
47	2x	unique(as.character(data[[dict$pollutant]]))
48		)
49
50	2x	data <- data[
51	2x	data[[dict$pollutant]] == pollutant & data[[dict$site]] == site,
52		]
53
54	2x	if (nrow(data) == 0L) {
55	!	cli::cli_warn(
56	!	"No data returned; please check {.field pollutant} and {.field site}."
57		)
58	!	return(NULL)
59		}
60
61	2x	plotdata <-
62	2x	tidyr::pivot_longer(
63	2x	data,
64	2x	cols = dplyr::all_of(c(dict$obs, dict$mod)),
65	2x	names_to = "mqor_name",
66	2x	values_to = "mqor_value"
67		)
68
69	2x	if (interactive) {
70	1x	plotdata$mqor_name[plotdata$mqor_name == "obs"] <- "Observations"
71	1x	plotdata$mqor_name[plotdata$mqor_name == "mod"] <- "Modelled Values"
72
73	1x	plt <-
74	1x	plotly::plot_ly() \|>
75	1x	plotly::add_trace(
76	1x	x = plotdata[[dict$date]],
77	1x	y = plotdata$mqor_value,
78	1x	color = plotdata$mqor_name,
79	1x	colors = stats::setNames(
80	1x	c(color_mod, color_obs),
81	1x	c("Modelled Values", "Observations")
82		),
83	1x	type = "scatter",
84	1x	mode = "lines+markers"
85		) \|>
86	1x	plotly::layout(
87	1x	yaxis = list(title = toupper(data[[dict$pollutant]][1])),
88	1x	xaxis = list(title = ""),
89	1x	legend = list(
90	1x	orientation = "h",
91	1x	y = 1.1
92		),
93	1x	hovermode = "x unified"
94		)
95		}
96
97	2x	if (!interactive) {
98	1x	plt <-
99	1x	plotdata \|>
100	1x	ggplot2::ggplot(ggplot2::aes(
101	1x	x = .data[[dict$date]],
102	1x	y = .data$mqor_value
103		)) +
104	1x	ggplot2::geom_line(ggplot2::aes(color = .data$mqor_name)) +
105	1x	ggplot2::geom_point(ggplot2::aes(color = .data$mqor_name)) +
106	1x	ggplot2::theme_bw(14) +
107	1x	ggplot2::theme(legend.position = "top", legend.justification = "left") +
108	1x	ggplot2::labs(
109	1x	x = NULL,
110	1x	y = toupper(data[[dict$pollutant]][1]),
111	1x	color = NULL
112		) +
113	1x	ggplot2::scale_color_manual(
114	1x	values = stats::setNames(
115	1x	c(color_mod, color_obs),
116	1x	c(dict$mod, dict$obs)
117		),
118	1x	breaks = c(dict$obs, dict$mod),
119	1x	labels = c("Observations", "Modelled Values")
120		)
121		}
122
123	2x	return(plt)
124		}

1		#' Create an interactive table of MQO summary data
2		#'
3		#' This function takes the output of [summarise_mqo_stats()] and creates an
4		#' interactive table of the outputs. The output table is nested per object,
5		#'
6		#' @inheritParams plot_comparison_bars
7		#'
8		#' @param expanded Initialise the widget as expanded? Defaults to `TRUE`. Passed
9		#' to [reactable::reactable()]'s `defaultExpanded` argument.
10		#'
11		#' @param color_pass,color_fail,color_fixed,color_indicative These colours are
12		#' used to colour various cells in the output. Can be expressed as hex codes,
13		#' or any colours listed in [colors()].
14		#'
15		#' @returns a `reactable::reactable()` object
16		#'
17		#' @examples
18		#' tabulate_mqo_stats(
19		#' summarise_mqo_stats(demo_longterm, pollutant = "PM10")
20		#' )
21		#'
22		#' tabulate_mqo_stats(
23		#' summarise_mqo_stats(demo_shortterm, pollutant = "PM10")
24		#' )
25		#'
26		#' @export
27		tabulate_mqo_stats <- function(
28		stats,
29		expanded = TRUE,
30		color_pass = "#136F63",
31		color_fail = "#840032",
32		color_fixed = "#4269D0",
33		color_indicative = "#EFB118"
34		) {
35	2x	validate_mqor_stats(stats)
36	2x	rlang::check_installed("reactable")
37
38	2x	style_lt_one <- make_style_lt_one(
39	2x	color_pass = color_pass,
40	2x	color_fail = color_fail
41		)
42
43	2x	style_type <- make_style_type(
44	2x	color_fixed = color_fixed,
45	2x	color_indicative = color_indicative
46		)
47
48	2x	tblname <- glue::glue(
49	2x	"MQO Statistics: {stats$inputs$pollutant}, {stats$inputs$term} term"
50		)
51
52	2x	term <- stats$inputs$term
53
54	2x	if (term == "short") {
55	1x	tbl <- dplyr::tibble(
56	1x	x = c("Overall Summary", "Stats by Site", "Parameters")
57		) \|>
58	1x	stats::setNames(tblname) \|>
59	1x	reactable::reactable(
60	1x	defaultExpanded = expanded,
61	1x	sortable = FALSE,
62	1x	searchable = FALSE,
63	1x	filterable = FALSE,
64	1x	details = function(index) {
65	3x	if (index == 1) {
66	1x	tabulate_short_summary(stats$summary, style_lt_one = style_lt_one)
67	2x	} else if (index == 2) {
68	1x	tabulate_short_by_site(
69	1x	stats$by_site,
70	1x	style_lt_one = style_lt_one,
71	1x	style_type = style_type
72		)
73	1x	} else if (index == 3) {
74	1x	tabulate_inputs(
75	1x	stats$inputs,
76	1x	term = term
77		)
78		}
79		}
80		)
81		}
82
83	2x	if (term == "long") {
84	1x	tbl <-
85	1x	dplyr::tibble(
86	1x	x = c("Overall Summary", "Stats by Type", "Stats by Site", "Parameters")
87		) \|>
88	1x	stats::setNames(tblname) \|>
89	1x	reactable::reactable(
90	1x	defaultExpanded = expanded,
91	1x	sortable = FALSE,
92	1x	searchable = FALSE,
93	1x	filterable = FALSE,
94	1x	details = function(index) {
95	4x	if (index == 1) {
96	1x	tabulate_long_summary(stats$summary, style_lt_one = style_lt_one)
97	3x	} else if (index == 2) {
98	1x	tabulate_long_by_type(
99	1x	stats$by_type,
100	1x	style_lt_one = style_lt_one,
101	1x	style_type = style_type
102		)
103	2x	} else if (index == 3) {
104	1x	tabulate_long_by_site(
105	1x	stats$by_site,
106	1x	style_lt_one = style_lt_one,
107	1x	style_type = style_type
108		)
109	1x	} else if (index == 4) {
110	1x	tabulate_inputs(stats$inputs, term = term)
111		}
112		}
113		)
114		}
115	2x	return(tbl)
116		}
117
118		#' @noRd
119		tabulate_short_summary <- function(data, style_lt_one) {
120	1x	reactable::reactable(
121	1x	dplyr::relocate(data, "mqo"),
122	1x	outlined = TRUE,
123	1x	defaultColDef = reactable::colDef(
124	1x	html = TRUE,
125	1x	format = reactable::colFormat(digits = 2)
126		),
127	1x	pagination = FALSE,
128	1x	columns = list(
129	1x	mqo = reactable::colDef(
130	1x	"MQO",
131	1x	align = "right",
132	1x	cell = function(value) {
133	1x	ifelse(value, paste("Pass ", svg_check), paste("Fail ", svg_cross))
134		},
135	1x	style = style_lt_one
136		),
137	1x	mqi_90 = reactable::colDef(
138	1x	"MQI<sub>90th</sub>",
139	1x	cell = cell_lt_one,
140	1x	style = style_lt_one
141		),
142	1x	ti_bias_90 = reactable::colDef(
143	1x	"TI<sub>B,90th</sub>",
144	1x	cell = cell_lt_one,
145	1x	style = style_lt_one
146		),
147	1x	ti_cor_90 = reactable::colDef(
148	1x	"TI<sub>R,90th</sub>",
149	1x	cell = cell_lt_one,
150	1x	style = style_lt_one
151		),
152	1x	ti_sd_90 = reactable::colDef(
153	1x	"TI<sub>σ,90th</sub>",
154	1x	cell = cell_lt_one,
155	1x	style = style_lt_one
156		)
157		),
158	1x	columnGroups = list(
159	1x	reactable::colGroup(
160	1x	name = "Temporal Indicators",
161	1x	columns = c("ti_bias_90", "ti_cor_90", "ti_sd_90")
162		)
163		)
164		)
165		}
166
167		#' @noRd
168		tabulate_short_by_site <- function(data, style_lt_one, style_type) {
169	1x	reactable::reactable(
170	1x	dplyr::relocate(data, dplyr::starts_with("sd_"), .after = "mean_mod"),
171	1x	outlined = TRUE,
172	1x	defaultColDef = reactable::colDef(
173	1x	html = TRUE,
174	1x	align = "right",
175	1x	vAlign = "center",
176	1x	format = reactable::colFormat(digits = 2)
177		),
178	1x	pagination = FALSE,
179	1x	columns = list(
180	1x	site = reactable::colDef("Site", align = "right"),
181	1x	type = reactable::colDef(
182	1x	"Type",
183	1x	align = "right",
184	1x	cell = cell_type,
185	1x	style = style_type
186		),
187	1x	mean_obs = reactable::colDef(
188	1x	'<span style="text-decoration: overline;">O</span>'
189		),
190	1x	mean_mod = reactable::colDef(
191	1x	'<span style="text-decoration: overline;">M</span>'
192		),
193	1x	bias = reactable::colDef("BIAS"),
194	1x	rmse = reactable::colDef("RMSE"),
195	1x	rmsu = reactable::colDef("RMSU"),
196	1x	rmsu_star = reactable::colDef("RMSU*"),
197	1x	r = reactable::colDef("COR."),
198	1x	sd_mod = reactable::colDef("σ<sub>M</sub>"),
199	1x	sd_obs = reactable::colDef("σ<sub>O</sub>"),
200	1x	crmse = reactable::colDef("CRMSE"),
201	1x	mqi = reactable::colDef("MQI", cell = cell_lt_one, style = style_lt_one),
202	1x	ti_bias = reactable::colDef(
203	1x	"TI<sub>B</sub>",
204	1x	cell = cell_lt_one,
205	1x	style = style_lt_one
206		),
207	1x	ti_cor = reactable::colDef(
208	1x	"TI<sub>R</sub>",
209	1x	cell = cell_lt_one,
210	1x	style = style_lt_one
211		),
212	1x	ti_sd = reactable::colDef(
213	1x	"TI<sub>σ</sub>",
214	1x	cell = cell_lt_one,
215	1x	style = style_lt_one
216		),
217	1x	ti_crmse = reactable::colDef(
218	1x	"TI<sub>CRMSE</sub>",
219	1x	cell = cell_lt_one,
220	1x	style = style_lt_one
221		)
222		),
223	1x	columnGroups = list(
224	1x	reactable::colGroup(
225	1x	name = "Temporal Indicators",
226	1x	columns = c("ti_bias", "ti_cor", "ti_sd", "ti_crmse")
227		)
228		)
229		)
230		}
231
232		#' @noRd
233		tabulate_long_summary <- function(data, style_lt_one) {
234	1x	reactable::reactable(
235	1x	dplyr::relocate(data, "mqo", "mqi_90"),
236	1x	outlined = TRUE,
237	1x	defaultColDef = reactable::colDef(
238	1x	html = TRUE,
239	1x	format = reactable::colFormat(digits = 2)
240		),
241	1x	pagination = FALSE,
242	1x	columns = list(
243	1x	mqo = reactable::colDef(
244	1x	"MQO",
245	1x	align = "right",
246	1x	cell = function(value) {
247	1x	ifelse(value, paste("Pass ", svg_check), paste("Fail ", svg_cross))
248		},
249	1x	style = style_lt_one
250		),
251	1x	mqi_90 = reactable::colDef(
252	1x	"MQI<sub>90th</sub>",
253	1x	cell = cell_lt_one,
254	1x	style = style_lt_one
255		),
256	1x	bias = reactable::colDef("BIAS"),
257	1x	rmse = reactable::colDef("RMSE"),
258	1x	r = reactable::colDef("COR."),
259	1x	sd_mod = reactable::colDef("σ<sub>M</sub>"),
260	1x	sd_obs = reactable::colDef("σ<sub>O</sub>"),
261	1x	rmsu = reactable::colDef("RMSU")
262		)
263		)
264		}
265
266		#' @noRd
267		tabulate_long_by_type <- function(data, style_lt_one, style_type) {
268	1x	reactable::reactable(
269	1x	data,
270	1x	outlined = TRUE,
271	1x	defaultColDef = reactable::colDef(
272	1x	html = TRUE,
273	1x	format = reactable::colFormat(digits = 2)
274		),
275	1x	pagination = FALSE,
276	1x	columns = list(
277	1x	type = reactable::colDef(
278	1x	"Type",
279	1x	align = "right",
280	1x	cell = cell_type,
281	1x	style = style_type
282		),
283	1x	si_bias = reactable::colDef(
284	1x	"SI<sub>B</sub>",
285	1x	cell = cell_lt_one,
286	1x	style = style_lt_one
287		),
288	1x	si_cor = reactable::colDef(
289	1x	"SI<sub>R</sub>",
290	1x	cell = cell_lt_one,
291	1x	style = style_lt_one
292		),
293	1x	si_sd = reactable::colDef(
294	1x	"SI<sub>σ</sub>",
295	1x	cell = cell_lt_one,
296	1x	style = style_lt_one
297		),
298	1x	si_rmse = reactable::colDef(
299	1x	"SI<sub>RMSE</sub>",
300	1x	cell = cell_lt_one,
301	1x	style = style_lt_one
302		)
303		),
304	1x	columnGroups = list(
305	1x	reactable::colGroup(
306	1x	name = "Spatial Indicators",
307	1x	columns = c("si_bias", "si_cor", "si_sd", "si_rmse")
308		)
309		)
310		)
311		}
312
313		#' @noRd
314		tabulate_long_by_site <- function(data, style_lt_one, style_type) {
315	1x	reactable::reactable(
316	1x	dplyr::relocate(data, "mqi", .after = "type"),
317	1x	outlined = TRUE,
318	1x	defaultColDef = reactable::colDef(
319	1x	html = TRUE,
320	1x	format = reactable::colFormat(digits = 2)
321		),
322	1x	pagination = FALSE,
323	1x	columns = list(
324	1x	site = reactable::colDef("Site", align = "right"),
325	1x	type = reactable::colDef(
326	1x	"Type",
327	1x	align = "right",
328	1x	cell = cell_type,
329	1x	style = style_type
330		),
331	1x	obs = reactable::colDef(
332	1x	'<span style="text-decoration: overline;">O</span>'
333		),
334	1x	mod = reactable::colDef(
335	1x	'<span style="text-decoration: overline;">M</span>'
336		),
337	1x	uncer = reactable::colDef(
338	1x	"U(ō)"
339		),
340	1x	ar_low = reactable::colDef(
341	1x	"AR<sub>low</sub>"
342		),
343	1x	ar_up = reactable::colDef(
344	1x	"AR<sub>up</sub>"
345		),
346	1x	mqi = reactable::colDef("MQI", cell = cell_lt_one, style = style_lt_one)
347		)
348		)
349		}
350
351		#' @noRd
352		tabulate_inputs <- function(data, term) {
353	2x	tibble::tibble(term = data$term, pollutant = data$pollutant) \|>
354	2x	dplyr::bind_cols(
355	2x	data$params_fixed \|>
356	2x	dplyr::rename_with(.fn = \(x) paste0("f_", x))
357		) \|>
358	2x	dplyr::bind_cols(
359	2x	data$params_indicative \|>
360	2x	dplyr::rename_with(.fn = \(x) paste0("i_", x))
361		) \|>
362	2x	dplyr::select(-"i_rv") \|>
363	2x	reactable::reactable(
364	2x	outlined = TRUE,
365	2x	filterable = FALSE,
366	2x	searchable = FALSE,
367	2x	pagination = FALSE,
368	2x	defaultColDef = reactable::colDef(
369	2x	align = "right",
370	2x	html = TRUE
371		),
372	2x	columns = list(
373	2x	term = reactable::colDef("Term", align = "right"),
374	2x	pollutant = reactable::colDef("Pollutant", align = "right"),
375	2x	f_rv = reactable::colDef(glue::glue("RV<sub>{term}</sub>")),
376	2x	f_u_rv = reactable::colDef(glue::glue(
377	2x	"U<sub>0r</sub>(RV<sub>{term}</sub>)"
378		)),
379	2x	f_a = reactable::colDef(glue::glue("α<sub>{term}</sub>")),
380	2x	f_b = reactable::colDef(glue::glue("β<sub>{term}</sub>")),
381	2x	i_u_rv = reactable::colDef(glue::glue(
382	2x	"U<sub>0r</sub>(RV<sub>{term}</sub>)"
383		)),
384	2x	i_a = reactable::colDef(glue::glue("α<sub>{term}</sub>")),
385	2x	i_b = reactable::colDef(glue::glue("β<sub>{term}</sub>"))
386		),
387	2x	columnGroups = list(
388	2x	reactable::colGroup(
389	2x	"Fixed",
390	2x	align = "right",
391	2x	columns = c("f_u_rv", "f_a", "f_b"),
392		),
393	2x	reactable::colGroup(
394	2x	"Indicative",
395	2x	align = "right",
396	2x	columns = c("i_u_rv", "i_a", "i_b")
397		)
398		)
399		)
400		}
401
402		#' @noRd
403		cell_lt_one <- function(value) {
404	99x	ifelse(
405	99x	value <= 1,
406	99x	paste(round(value, 2), svg_check),
407	99x	paste(round(value, 2), svg_cross)
408		)
409		}
410
411		#' @noRd
412		make_style_lt_one <- function(color_pass, color_fail) {
413	2x	function(value) {
414	101x	if (is.logical(value)) {
415	2x	color <- ifelse(value, color_pass, color_fail)
416		} else {
417	99x	color <- ifelse(value <= 1, color_pass, color_fail)
418		}
419	101x	list(color = color, fontWeight = "bold")
420		}
421		}
422
423		#' @noRd
424		cell_type <- function(value) {
425	31x	value <- stringr::str_to_title(value)
426	31x	ifelse(value == "Fixed", paste(value, svg_building), paste(value, svg_box))
427		}
428
429		#' @noRd
430		make_style_type <- function(color_fixed, color_indicative) {
431	2x	function(value) {
432	31x	color <- ifelse(value == "fixed", color_fixed, color_indicative)
433	31x	list(color = color)
434		}
435		}
436
437		svg_check <-
438		"<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 16 16\" class=\"bi bi-check-circle \" style=\"height:1em;width:1em;fill:currentColor;vertical-align:-0.125em;\" aria-hidden=\"true\" role=\"img\" ><path d=\"M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z\"></path>\n<path d=\"M10.97 4.97a.235.235 0 0 0-.02.022L7.477 9.417 5.384 7.323a.75.75 0 0 0-1.06 1.06L6.97 11.03a.75.75 0 0 0 1.079-.02l3.992-4.99a.75.75 0 0 0-1.071-1.05z\"></path></svg>"
439
440		svg_cross <-
441		"<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 16 16\" class=\"bi bi-x-circle \" style=\"height:1em;width:1em;fill:currentColor;vertical-align:-0.125em;\" aria-hidden=\"true\" role=\"img\" ><path d=\"M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z\"></path>\n<path d=\"M4.646 4.646a.5.5 0 0 1 .708 0L8 7.293l2.646-2.647a.5.5 0 0 1 .708.708L8.707 8l2.647 2.646a.5.5 0 0 1-.708.708L8 8.707l-2.646 2.647a.5.5 0 0 1-.708-.708L7.293 8 4.646 5.354a.5.5 0 0 1 0-.708z\"></path></svg>"
442
443		svg_building <-
444		"<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 16 16\" class=\"bi bi-building \" style=\"height:1em;width:1em;fill:currentColor;vertical-align:-0.125em;\" aria-hidden=\"true\" role=\"img\" ><path d=\"M4 2.5a.5.5 0 0 1 .5-.5h1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-1a.5.5 0 0 1-.5-.5v-1Zm3 0a.5.5 0 0 1 .5-.5h1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-1a.5.5 0 0 1-.5-.5v-1Zm3.5-.5a.5.5 0 0 0-.5.5v1a.5.5 0 0 0 .5.5h1a.5.5 0 0 0 .5-.5v-1a.5.5 0 0 0-.5-.5h-1ZM4 5.5a.5.5 0 0 1 .5-.5h1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-1a.5.5 0 0 1-.5-.5v-1ZM7.5 5a.5.5 0 0 0-.5.5v1a.5.5 0 0 0 .5.5h1a.5.5 0 0 0 .5-.5v-1a.5.5 0 0 0-.5-.5h-1Zm2.5.5a.5.5 0 0 1 .5-.5h1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-1a.5.5 0 0 1-.5-.5v-1ZM4.5 8a.5.5 0 0 0-.5.5v1a.5.5 0 0 0 .5.5h1a.5.5 0 0 0 .5-.5v-1a.5.5 0 0 0-.5-.5h-1Zm2.5.5a.5.5 0 0 1 .5-.5h1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-1a.5.5 0 0 1-.5-.5v-1Zm3.5-.5a.5.5 0 0 0-.5.5v1a.5.5 0 0 0 .5.5h1a.5.5 0 0 0 .5-.5v-1a.5.5 0 0 0-.5-.5h-1Z\"></path>\n<path d=\"M2 1a1 1 0 0 1 1-1h10a1 1 0 0 1 1 1v14a1 1 0 0 1-1 1H3a1 1 0 0 1-1-1V1Zm11 0H3v14h3v-2.5a.5.5 0 0 1 .5-.5h3a.5.5 0 0 1 .5.5V15h3V1Z\"></path></svg>"
445
446		svg_box <-
447		"<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 16 16\" class=\"bi bi-box \" style=\"height:1em;width:1em;fill:currentColor;vertical-align:-0.125em;\" aria-hidden=\"true\" role=\"img\" ><path d=\"M8.186 1.113a.5.5 0 0 0-.372 0L1.846 3.5 8 5.961 14.154 3.5 8.186 1.113zM15 4.239l-6.5 2.6v7.922l6.5-2.6V4.24zM7.5 14.762V6.838L1 4.239v7.923l6.5 2.6zM7.443.184a1.5 1.5 0 0 1 1.114 0l7.129 2.852A.5.5 0 0 1 16 3.5v8.662a1 1 0 0 1-.629.928l-7.185 2.874a.5.5 0 0 1-.372 0L.63 13.09a1 1 0 0 1-.63-.928V3.5a.5.5 0 0 1 .314-.464L7.443.184z\"></path></svg>"

1		#' Filter a dataset by temporal characteristics
2		#'
3		#' These functions filter a dataset using features of the "date" column, as
4		#' defined by [mqo_dict()]. Current options include the year, the month of the
5		#' year, the day of the week, and the hour of the day. These options allow users
6		#' to flexibly filter their data; for example, returning only weekday evenings
7		#' in Summer months. All but [filter_year()] require short-term data;
8		#' [filter_year()] will work out whether the data is short- or long-term based
9		#' on the data type of the "date" column.
10		#'
11		#' @inheritParams summarise_mqo_stats
12		#'
13		#' @param years,months,wdays,hours A numeric vector indicating the years (any
14		#' intger), months of the year (`1` to `12`), days of the week (`1` to `7`,
15		#' where `1` is Sunday), or hours of the day (`1` to `24`) to retain in the
16		#' data.
17		#'
18		#' @family data utilities
19		#'
20		#' @rdname filter-time
21		#' @export
22		filter_year <- function(data, years, dict = mqor::mqo_dict()) {
23	1x	if (is.numeric(data[[dict$date]])) {
24	!	id <- data[[dict$date]] %in% years
25		} else {
26	1x	id <- lubridate::year(data[[dict$date]]) %in% years
27		}
28	1x	return(data[id, ])
29		}
30
31		#' @rdname filter-time
32		#' @export
33		filter_month <- function(data, months = 1:12, dict = mqor::mqo_dict()) {
34	2x	validate_filter_inputs(months, 1:12, "months")
35	1x	id <- lubridate::month(data[[dict$date]]) %in% months
36	1x	return(data[id, ])
37		}
38
39		#' @rdname filter-time
40		#' @export
41		filter_wday <- function(data, wdays = 1:7, dict = mqor::mqo_dict()) {
42	1x	validate_filter_inputs(wdays, 1:7, "wdays")
43	1x	id <- lubridate::wday(data[[dict$date]], week_start = 7) %in% wdays
44	1x	return(data[id, ])
45		}
46
47		#' @rdname filter-time
48		#' @export
49		filter_hour <- function(data, hours = 0:23, dict = mqor::mqo_dict()) {
50	1x	validate_filter_inputs(hours, 0:23, "hours")
51	1x	id <- lubridate::hour(data[[dict$date]]) %in% hours
52	1x	return(data[id, ])
53		}
54
55		#' @noRd
56		validate_filter_inputs <- function(x, allowed, field) {
57	4x	error <- x[!x %in% allowed]
58	4x	if (length(error) > 0 \|\| !rlang::is_integerish(x)) {
59	1x	cli::cli_abort(c(
60	1x	"{.field {field}} should only cover the integer values {min{allowed}} to {max{allowed}}.",
61	1x	"Bad values: {error}"
62		))
63		}
64		}

1		#' Get path to `{mqor}` example files
2		#'
3		#' `{mqor}` comes bundled with some sample datasets in its `inst/demo_files`
4		#' directory. These are identical to [demo_shortterm] and [demo_longterm], and
5		#' can be read using [read_mqor()]. This function makes these files easy to
6		#' access.
7		#'
8		#' @param file Path to file. If `NULL`, the example files will be listed.
9		#' @seealso [demo_shortterm], [download_demo_files()]
10		#' @export
11		#' @examples
12		#' demo_files()
13		#' demo_files("demo_longterm.csv")
14		demo_files <- function(file = NULL) {
15	4x	opts <- dir(system.file("demo_files", package = "mqor"))
16	4x	if (is.null(file)) {
17	1x	return(opts)
18		} else {
19	3x	file <- rlang::arg_match(file, opts, multiple = FALSE)
20	3x	system.file("demo_files", file, package = "mqor", mustWork = TRUE)
21		}
22		}
23
24		#' Download larger example files from GitLab
25		#'
26		#' As well as the simple files found via [demo_files()], `{mqor}` provides more
27		#' complex data which is too large to bundle with an R package. This function
28		#' downloads these files to a directory. These files are the demo dataset found
29		#' in the DELTA tool, reformatted for [read_mqor()].
30		#'
31		#' @param dir The destination directory for the files.
32		#'
33		#' @param files A vector for the specific files to download.
34		#'
35		#' @seealso [demo_files()]
36		#'
37		#' @export
38		download_demo_files <- function(
39		dir,
40		files = c("delta_attributes.csv", "delta_shortterm.csv", "delta_longterm.csv")
41		) {
42	!	urlstr <- "https://code.europa.eu/jrcairqualitymodelling/mqor/-/raw/main/data-delta/FILE?ref_type=heads"
43
44	!	files <- rlang::arg_match(files, multiple = TRUE)
45
46	!	if (!dir.exists(dir)) {
47	!	dir.create(dir)
48		}
49	!	for (i in files) {
50	!	utils::download.file(
51	!	url = gsub("FILE", i, urlstr),
52	!	destfile = file.path(dir, i)
53		)
54		}
55		}
56
57		#' Simple demonstration datasets taken from the CEN technical specification
58		#'
59		#' These two datasets contain dummy data outlined in Annex G of *Ambient Air —
60		#' Definition and use of modelling quality objectives for air quality
61		#' assessment*. These data are mostly useful for demonstrating `mqor`
62		#' functionality for teaching and learning purposes. `demo_shortterm` should be
63		#' used with `mqo_params(term = "short")` and `demo_longterm` with
64		#' `mqo_params(term = "long")`. Note that `demo_shortterm` only has 5 rows of
65		#' data for expedience; real datasets should have periodic data representing a
66		#' whole year at least.
67		#'
68		#' \describe{
69		#' \item{site}{Site identifiers; S1 through S15.}
70		#' \item{type}{Site type; all are 'fixed' butcould also be 'indicative'.}
71		#' \item{param}{The pollutant (in this case "PM10")}
72		#' \item{date}{The datetime of the measurement. An integer in `demo_longterm`, and a Date in `demo_shortterm`.}
73		#' \item{obs}{Observed (measured) values.}
74		#' \item{mod}{Modelled values.}
75		#' }
76		#'
77		#' @rdname demo-data
78		#' @seealso [demo_files()]
79		#' @order 1
80		#' @source Ambient Air — Definition and use of modelling quality objectives for
81		#' air quality assessment (TC 264 WI 00264248:2025(E))
82		#'
83		#' @examples
84		#' demo_shortterm
85		"demo_shortterm"
86
87		#' @rdname demo-data
88		#' @order 2
89		#'
90		#' @examples
91		#' demo_longterm
92		"demo_longterm"

1		#' Format DELTA-formatted files in a way needed by `{mqor}`
2		#'
3		#' This function ingests monitoring and modeling data, as well as site metadata
4		#' contained within the `startup.ini` file, and returns a `data.frame` in an
5		#' appropriate format for other `{mqor}` functions (e.g.,
6		#' [summarise_mqo_stats()]).
7		#'
8		#' @param monitoring A `data.frame` imported using [read_delta_data_delim()]
9		#' containing monitoring data.
10		#'
11		#' @param modeling One or more `data.frames` imported using
12		#' [read_delta_data_delim()] or [read_delta_data_cdf()] containing modeling
13		#' data. Multiple objects should be provided as a list.
14		#'
15		#' @param startup The object returned by [read_delta_resource()] when `file =
16		#' "startup_*.ini"`
17		#'
18		#' @param data_type Whether to label the DELTA input data as representing
19		#' `"fixed"` or `"indicative"` measurements. Defaults to `"fixed"`.
20		#'
21		#' @export
22		fmt_delta_for_mqor <- function(
23		monitoring,
24		modeling,
25		startup,
26		data_type = c("fixed", "indicative")
27		) {
28	!	data_type <- rlang::arg_match(data_type)
29
30	!	joined <-
31	!	dplyr::left_join(
32	!	monitoring,
33	!	dplyr::bind_rows(modeling),
34	!	dplyr::join_by("date", "site", "param")
35		)
36
37	!	meta <- startup$monitoring
38	!	names(meta)[names(meta) == "station_name"] <- "site"
39
40	!	joined <-
41	!	dplyr::left_join(joined, meta, dplyr::join_by("site")) \|>
42	!	dplyr::mutate(type = data_type, .after = "site")
43
44	!	return(joined)
45		}

1		#' Launch the `{mqor}` Shiny Interface
2		#'
3		#' This app runs [shiny::shinyAppDir()] to launching a web interface built into
4		#' `{mqor}` which allows users to use certain functions without needing to write
5		#' any R scripts.
6		#'
7		#' @inheritParams shiny::shinyAppDir
8		#'
9		#' @author Jack Davison
10		#'
11		#' @export
12		launch_app <- function(options = list()) {
13	!	rlang::check_installed(c(
14	!	"shiny",
15	!	"bslib",
16	!	"plotly",
17	!	"leaflet",
18	!	"rlang",
19	!	"reactable"
20		))
21	!	shiny::shinyAppDir(
22	!	appDir = system.file("shiny", package = "mqor"),
23	!	options = options
24		)
25		}