1 |
#' Quickly summarise observed (measured) and modelled concentrations for MQO |
|
2 |
#' evaluation |
|
3 |
#' |
|
4 |
#' This function calculates all of the model quality objective indictors (e.g., |
|
5 |
#' [vec_mqi()]) *and* all of the complementary indicators (e.g., |
|
6 |
#' [vec_pi_bias()]) for a given dataset. Further relevant summaries are also |
|
7 |
#' calculated; for example, the 90th percentile of `mqi` values, `mqi_90`, is |
|
8 |
#' calculated using [mqo_percentile()]. Two or three datasets are returned; an |
|
9 |
#' overall `summary` dataset, a `by_site` site-wise summary, and - for long-term |
|
10 |
#' data - a `by_type` split fixed/indicative summary. |
|
11 |
#' |
|
12 |
#' @param data An R `data.frame` containing at least five columns; a numeric |
|
13 |
#' column of observed values, a numeric column of modelled values, a character |
|
14 |
#' or factor column of identifiers that identify the site associated with the |
|
15 |
#' concentrations, a character or factor column of identifiers that identify |
|
16 |
#' the pollutant being measured/modelled, and a character or factor column |
|
17 |
#' containing just `"fixed"` or `"indicative"` to label each site. See |
|
18 |
#' [demo_shortterm] for an example format. |
|
19 |
#' |
|
20 |
#' @param pollutant The pollutant to calculate model quality objectives for. |
|
21 |
#' Should be a value contained within `data[[dict$pollutant]]`. |
|
22 |
#' |
|
23 |
#' @param term Either `"short"` or `"long"`, identifying whether `data` |
|
24 |
#' represents Short- or Long-term data. The default, `NULL`, will assume this |
|
25 |
#' from the `data` by checking whether `data[[dict$site]]` contains duplicate |
|
26 |
#' values. |
|
27 |
#' |
|
28 |
#' @param params_fixed,params_indicative See [mqo_params()] for more |
|
29 |
#' information. Defines fixed and indicative parameters. By default, `{mqor}` |
|
30 |
#' attempts to sample appropriate values from [default_params] using `term` |
|
31 |
#' and `pollutant`. |
|
32 |
#' |
|
33 |
#' @param dict See [mqo_dict()] for more information. Acts as a data dictionary |
|
34 |
#' to specify the columns in the data `{mqor}` should use. |
|
35 |
#' |
|
36 |
#' @param table By default this function returns a list of different data |
|
37 |
#' objects. `table` allows a user to select just one. Can be one of |
|
38 |
#' `"summary"`, `"by_type"`, `"by_site"` or `"inputs"`. Note that, if `table` |
|
39 |
#' is provided, the output cannot be then fed into [plot_mqi_scatter()] and |
|
40 |
#' similar functions. |
|
41 |
#' |
|
42 |
#' @returns a list with at most the objects `summary` showing whole data |
|
43 |
#' summaries (e.g., `mqi_90`), `by_type` which shows per-fixed/indicative |
|
44 |
#' values, `by_site` which shows per-site values (e.g., `mqi`), and `inputs` |
|
45 |
#' which give the input parameters. The columns in each vary depending on the |
|
46 |
#' value of `term`. Just one of these can be selected using the `table` |
|
47 |
#' argument. |
|
48 |
#' |
|
49 |
#' @family model performance indicators |
|
50 |
#' |
|
51 |
#' @examples |
|
52 |
#' summarise_mqo_stats( |
|
53 |
#' demo_longterm, |
|
54 |
#' pollutant = "PM10" |
|
55 |
#' ) |
|
56 |
#' |
|
57 |
#' summarise_mqo_stats( |
|
58 |
#' demo_shortterm, |
|
59 |
#' pollutant = "PM10" |
|
60 |
#' ) |
|
61 |
#' |
|
62 |
#' @author Jack Davison |
|
63 |
#' |
|
64 |
#' @export |
|
65 |
summarise_mqo_stats <- function( |
|
66 |
data, |
|
67 |
pollutant, |
|
68 |
term = NULL, |
|
69 |
params_fixed = NULL, |
|
70 |
params_indicative = NULL, |
|
71 |
dict = mqor::mqo_dict(), |
|
72 |
table = NULL |
|
73 |
) { |
|
74 | 6x |
pollutant <- rlang::arg_match( |
75 | 6x |
pollutant, |
76 | 6x |
unique(as.character(data[[dict$pollutant]])) |
77 |
) |
|
78 | 6x |
data <- data[data[[dict$pollutant]] == pollutant, ] |
79 | ||
80 | 6x |
data <- |
81 | 6x |
dplyr::filter( |
82 | 6x |
data, |
83 | 6x |
dplyr::if_else( |
84 | 6x |
all(is.na(.data[[dict$obs]])) | all(is.na(.data[[dict$mod]])), |
85 | 6x |
true = FALSE, |
86 | 6x |
false = TRUE |
87 |
), |
|
88 | 6x |
.by = c(dict$site) |
89 |
) |
|
90 | ||
91 | 6x |
term <- determine_term(data, term, dict) |
92 | ||
93 | 6x |
if (missing(params_fixed)) { |
94 | 6x |
res <- determine_resolution(data, term, dict) |
95 | ||
96 | 6x |
params_fixed <- |
97 | 6x |
mqo_params_default( |
98 | 6x |
term = term, |
99 | 6x |
type = "fixed", |
100 | 6x |
pollutant = pollutant, |
101 | 6x |
resolution = res |
102 |
) |
|
103 |
} |
|
104 | ||
105 | 6x |
if (missing(params_indicative)) { |
106 | 6x |
res <- determine_resolution(data, term, dict) |
107 | ||
108 | 6x |
params_indicative <- |
109 | 6x |
mqo_params_default( |
110 | 6x |
term = term, |
111 | 6x |
type = "indicative", |
112 | 6x |
pollutant = pollutant, |
113 | 6x |
resolution = res |
114 |
) |
|
115 |
} |
|
116 | ||
117 | 6x |
data_splits <- split(data, data[[dict$type]]) |
118 | ||
119 | 6x |
if (term == "short") { |
120 | 3x |
site_stats <- calculate_split_stats( |
121 | 3x |
fun = calc_shortterm_site_stats, |
122 | 3x |
data = data, |
123 | 3x |
data_splits = data_splits, |
124 | 3x |
term = term, |
125 | 3x |
params_fixed = params_fixed, |
126 | 3x |
params_indicative = params_indicative, |
127 | 3x |
obs = dict$obs, |
128 | 3x |
mod = dict$mod, |
129 | 3x |
site = dict$site, |
130 | 3x |
type = dict$type |
131 |
) |
|
132 | ||
133 | 3x |
group_stats <- |
134 | 3x |
site_stats |> |
135 | 3x |
dplyr::summarise( |
136 | 3x |
mqi_90 = mqo_percentile(.data$mqi, na.rm = TRUE), |
137 | 3x |
ti_bias_90 = mqo_percentile(abs(.data$ti_bias), na.rm = TRUE), |
138 | 3x |
ti_cor_90 = mqo_percentile(abs(.data$ti_cor), na.rm = TRUE), |
139 | 3x |
ti_sd_90 = mqo_percentile(abs(.data$ti_sd), na.rm = TRUE) |
140 |
) |> |
|
141 | 3x |
dplyr::mutate(mqo = .data$mqi_90 <= 1, .after = "mqi_90") |
142 |
} |
|
143 | ||
144 | 6x |
if (term == "long") { |
145 | 3x |
site_stats <- calculate_split_stats( |
146 | 3x |
fun = calc_longterm_site_stats, |
147 | 3x |
data = data, |
148 | 3x |
data_splits = data_splits, |
149 | 3x |
term = term, |
150 | 3x |
params_fixed = params_fixed, |
151 | 3x |
params_indicative = params_indicative, |
152 | 3x |
obs = dict$obs, |
153 | 3x |
mod = dict$mod, |
154 | 3x |
site = dict$site, |
155 | 3x |
type = dict$type |
156 |
) |
|
157 | ||
158 | 3x |
type_stats <- calculate_split_stats( |
159 | 3x |
fun = calc_longterm_type_stats, |
160 | 3x |
data = data, |
161 | 3x |
data_splits = data_splits, |
162 | 3x |
term = term, |
163 | 3x |
params_fixed = params_fixed, |
164 | 3x |
params_indicative = params_indicative, |
165 | 3x |
obs = dict$obs, |
166 | 3x |
mod = dict$mod, |
167 | 3x |
site = dict$site, |
168 | 3x |
type = dict$type |
169 |
) |
|
170 | ||
171 | 3x |
group_stats <- |
172 | 3x |
dplyr::summarise( |
173 | 3x |
data, |
174 | 3x |
bias = vec_bias( |
175 | 3x |
obs = .data[[dict$obs]], |
176 | 3x |
mod = .data[[dict$mod]], |
177 | 3x |
na.rm = TRUE |
178 |
), |
|
179 | 3x |
rmse = vec_rmse( |
180 | 3x |
obs = .data[[dict$obs]], |
181 | 3x |
mod = .data[[dict$mod]], |
182 | 3x |
na.rm = TRUE |
183 |
), |
|
184 | 3x |
r = vec_cor( |
185 | 3x |
obs = .data[[dict$obs]], |
186 | 3x |
mod = .data[[dict$mod]], |
187 | 3x |
na.rm = TRUE |
188 |
), |
|
189 | 3x |
sd_obs = vec_sd( |
190 | 3x |
x = .data[[dict$obs]], |
191 | 3x |
term = term, |
192 | 3x |
na.rm = TRUE |
193 |
), |
|
194 | 3x |
sd_mod = vec_sd( |
195 | 3x |
x = .data[[dict$mod]], |
196 | 3x |
term = term, |
197 | 3x |
na.rm = TRUE |
198 |
) |
|
199 |
) |
|
200 | ||
201 | 3x |
uncer <- list() |
202 | 3x |
if ("fixed" %in% names(data_splits)) { |
203 | 3x |
uncer <- |
204 | 3x |
append( |
205 | 3x |
uncer, |
206 | 3x |
list(dplyr::mutate( |
207 | 3x |
data_splits$fixed, |
208 | 3x |
uncer = scalar_uncer( |
209 | 3x |
.data[[dict$obs]], |
210 | 3x |
term = term, |
211 | 3x |
params = params_fixed |
212 |
) |
|
213 |
)) |
|
214 |
) |
|
215 |
} |
|
216 | 3x |
if ("indicative" %in% names(data_splits)) { |
217 | 2x |
uncer <- |
218 | 2x |
append( |
219 | 2x |
uncer, |
220 | 2x |
list( |
221 | 2x |
dplyr::mutate( |
222 | 2x |
data_splits$indicative, |
223 | 2x |
uncer = scalar_uncer( |
224 | 2x |
.data[[dict$obs]], |
225 | 2x |
term = term, |
226 | 2x |
params = params_indicative |
227 |
) |
|
228 |
) |
|
229 |
) |
|
230 |
) |
|
231 |
} |
|
232 | 3x |
rmsu <- |
233 | 3x |
uncer |> |
234 | 3x |
dplyr::bind_rows() |> |
235 | 3x |
dplyr::summarise(rmsu = vec_rmsu_uncer(uncer, na.rm = TRUE)) |
236 | ||
237 | 3x |
group_stats$rmsu <- rmsu$rmsu |
238 | 3x |
group_stats$mqi_90 <- mqo_percentile(site_stats$mqi, na.rm = TRUE) |
239 | 3x |
group_stats$mqo <- group_stats$mqi_90 <= 1 |
240 |
} |
|
241 | ||
242 | 6x |
site_stats[[dict$site]] <- factor( |
243 | 6x |
site_stats[[dict$site]], |
244 | 6x |
unique(data[[dict$site]]) |
245 |
) |
|
246 | 6x |
site_stats <- dplyr::arrange(site_stats, .data[[dict$site]]) |
247 | ||
248 | 6x |
out <- list(summary = group_stats) |
249 | 6x |
if (term == "long") { |
250 | 3x |
out <- append(out, list(by_type = type_stats)) |
251 |
} |
|
252 | 6x |
out <- append(out, list(by_site = site_stats)) |
253 | ||
254 |
# rename for consistency |
|
255 | 6x |
for (i in seq_along(out)) { |
256 | 15x |
for (j in seq_along(dict)) { |
257 | 90x |
names(out[[i]])[names(out[[i]]) == dict[[j]]] <- names(dict)[[j]] |
258 |
} |
|
259 |
} |
|
260 | ||
261 |
# append input info |
|
262 | 6x |
out <- append( |
263 | 6x |
out, |
264 | 6x |
list( |
265 | 6x |
inputs = list( |
266 | 6x |
term = term, |
267 | 6x |
pollutant = pollutant, |
268 | 6x |
params_fixed = enframe_params(params_fixed), |
269 | 6x |
params_indicative = enframe_params(params_indicative) |
270 |
) |
|
271 |
) |
|
272 |
) |
|
273 | ||
274 |
# return specific table? |
|
275 | 6x |
if (!is.null(table)) { |
276 | ! |
rlang::arg_match(table, names(out), multiple = FALSE) |
277 | ! |
out <- out[[table]] |
278 |
} else { |
|
279 | 6x |
class(out) <- "mqor_stats" |
280 |
} |
|
281 | ||
282 | 6x |
return(out) |
283 |
} |
|
284 | ||
285 |
#' @noRd |
|
286 |
enframe_params <- function(params) { |
|
287 | 12x |
tbl <- dplyr::as_tibble(params) |
288 | 12x |
if (nrow(tbl) == 0L) { |
289 | ! |
tbl <- dplyr::tibble( |
290 | ! |
rv = NA_real_, |
291 | ! |
u_rv = NA_real_, |
292 | ! |
a = NA_real_, |
293 | ! |
b = NA_real_ |
294 |
) |
|
295 |
} |
|
296 | 12x |
return(tbl) |
297 |
} |
|
298 | ||
299 |
#' Helper to take data splits from parent function, apply different parmas to |
|
300 |
#' each, bind and return. |
|
301 |
#' @noRd |
|
302 |
calculate_split_stats <- function( |
|
303 |
fun, |
|
304 |
data, |
|
305 |
data_splits, |
|
306 |
term, |
|
307 |
params_fixed, |
|
308 |
params_indicative, |
|
309 |
obs, |
|
310 |
mod, |
|
311 |
site, |
|
312 |
type |
|
313 |
) { |
|
314 | 9x |
site_stats <- list() |
315 | ||
316 | 9x |
if ("fixed" %in% names(data_splits)) { |
317 | 9x |
if (is.null(params_fixed)) { |
318 | ! |
cli::cli_abort( |
319 | ! |
"'fixed' data has been identified; please provide parameters via {.field params_fixed}." |
320 |
) |
|
321 |
} |
|
322 | ||
323 | 9x |
site_stats <- |
324 | 9x |
append( |
325 | 9x |
site_stats, |
326 | 9x |
list( |
327 | 9x |
fun( |
328 | 9x |
data = data_splits$fixed, |
329 | 9x |
params = params_fixed, |
330 | 9x |
term = term, |
331 | 9x |
obs = obs, |
332 | 9x |
mod = mod, |
333 | 9x |
site = site, |
334 | 9x |
type = type |
335 |
) |
|
336 |
) |
|
337 |
) |
|
338 |
} |
|
339 | ||
340 | 9x |
if ("indicative" %in% names(data_splits)) { |
341 | 6x |
if (is.null(params_indicative)) { |
342 | ! |
cli::cli_abort( |
343 | ! |
"'indicative' data has been identified; please provide parameters via {.field params_indicative}." |
344 |
) |
|
345 |
} |
|
346 | ||
347 | 6x |
site_stats <- |
348 | 6x |
append( |
349 | 6x |
site_stats, |
350 | 6x |
list( |
351 | 6x |
fun( |
352 | 6x |
data = data_splits$indicative, |
353 | 6x |
params = params_indicative, |
354 | 6x |
term = term, |
355 | 6x |
obs = obs, |
356 | 6x |
mod = mod, |
357 | 6x |
site = site, |
358 | 6x |
type = type |
359 |
) |
|
360 |
) |
|
361 |
) |
|
362 |
} |
|
363 | ||
364 | 9x |
site_stats <- dplyr::bind_rows(site_stats) |
365 | ||
366 | 9x |
return(site_stats) |
367 |
} |
|
368 | ||
369 |
#' Helper for short-term site stats |
|
370 |
#' @noRd |
|
371 |
calc_shortterm_site_stats <- function( |
|
372 |
data, |
|
373 |
term, |
|
374 |
params, |
|
375 |
obs, |
|
376 |
mod, |
|
377 |
site, |
|
378 |
type |
|
379 |
) { |
|
380 | 5x |
dplyr::summarise( |
381 | 5x |
data, |
382 | 5x |
mean_obs = mean(.data[[obs]], na.rm = TRUE), |
383 | 5x |
mean_mod = mean(.data[[mod]], na.rm = TRUE), |
384 | 5x |
bias = vec_bias( |
385 | 5x |
obs = .data[[obs]], |
386 | 5x |
mod = .data[[mod]], |
387 | 5x |
na.rm = TRUE |
388 |
), |
|
389 | 5x |
rmse = vec_rmse( |
390 | 5x |
obs = .data[[obs]], |
391 | 5x |
mod = .data[[mod]], |
392 | 5x |
na.rm = TRUE |
393 |
), |
|
394 | 5x |
rmsu = vec_rmsu( |
395 | 5x |
obs = .data[[obs]], |
396 | 5x |
term = term, |
397 | 5x |
params = params, |
398 | 5x |
na.rm = TRUE |
399 |
), |
|
400 | 5x |
r = vec_cor( |
401 | 5x |
obs = .data[[obs]], |
402 | 5x |
mod = .data[[mod]], |
403 | 5x |
na.rm = TRUE |
404 |
), |
|
405 | 5x |
sd_obs = vec_sd(x = .data[[obs]], term = term, na.rm = TRUE), |
406 | 5x |
sd_mod = vec_sd(x = .data[[mod]], term = term, na.rm = TRUE), |
407 | 5x |
crmse = vec_crmse( |
408 | 5x |
obs = .data[[obs]], |
409 | 5x |
mod = .data[[mod]], |
410 | 5x |
na.rm = TRUE |
411 |
), |
|
412 | 5x |
mqi = vec_mqi2( |
413 | 5x |
rmsu = .data$rmsu, |
414 | 5x |
err = abs(.data$rmse), |
415 | 5x |
b = params$b |
416 |
), |
|
417 | 5x |
ti_bias = vec_pi_bias2( |
418 | 5x |
bias = .data$bias, |
419 | 5x |
rmsu = .data$rmsu, |
420 | 5x |
b = params$b |
421 |
), |
|
422 | 5x |
ti_cor = vec_pi_cor2( |
423 | 5x |
cor = .data$r, |
424 | 5x |
sd_o = .data$sd_obs, |
425 | 5x |
sd_m = .data$sd_mod, |
426 | 5x |
rmsu = .data$rmsu, |
427 | 5x |
b = params$b |
428 |
), |
|
429 | 5x |
ti_sd = vec_pi_sd2( |
430 | 5x |
sd_o = .data$sd_obs, |
431 | 5x |
sd_m = .data$sd_mod, |
432 | 5x |
rmsu = .data$rmsu, |
433 | 5x |
b = params$b |
434 |
), |
|
435 | 5x |
ti_crmse = vec_ti_crmse2( |
436 | 5x |
mqi = .data$mqi, |
437 | 5x |
tib = .data$ti_bias |
438 |
), |
|
439 | 5x |
.by = dplyr::all_of(c(site, type)) |
440 |
) |> |
|
441 | 5x |
dplyr::mutate( |
442 | 5x |
rmsu_star = .data$rmsu * sqrt(1 + params$b^2), |
443 | 5x |
.after = "rmsu" |
444 |
) |
|
445 |
} |
|
446 | ||
447 |
#' @noRd |
|
448 |
calc_longterm_site_stats <- function( |
|
449 |
data, |
|
450 |
term, |
|
451 |
params, |
|
452 |
obs, |
|
453 |
mod, |
|
454 |
site, |
|
455 |
type |
|
456 |
) { |
|
457 | 5x |
dplyr::summarise( |
458 | 5x |
data, |
459 | 5x |
obs = .data[[obs]], |
460 | 5x |
mod = .data[[mod]], |
461 | 5x |
uncer = scalar_uncer( |
462 | 5x |
obs = .data[[obs]], |
463 | 5x |
term = term, |
464 | 5x |
params = params, |
465 | 5x |
na.rm = TRUE |
466 |
), |
|
467 | 5x |
ar_low = .data[[obs]] - sqrt(1 + params$b^2) * .data$uncer, |
468 | 5x |
ar_up = .data[[obs]] + sqrt(1 + params$b^2) * .data$uncer, |
469 | 5x |
mqi = vec_mqi( |
470 | 5x |
obs = .data[[obs]], |
471 | 5x |
mod = .data[[mod]], |
472 | 5x |
term = term, |
473 | 5x |
params = params, |
474 | 5x |
na.rm = TRUE |
475 |
), |
|
476 | 5x |
.by = dplyr::all_of(c(site, type)) |
477 |
) |
|
478 |
} |
|
479 | ||
480 |
#' @noRd |
|
481 |
calc_longterm_type_stats <- function( |
|
482 |
data, |
|
483 |
term, |
|
484 |
params, |
|
485 |
obs, |
|
486 |
mod, |
|
487 |
site, |
|
488 |
type |
|
489 |
) { |
|
490 | 5x |
dplyr::summarise( |
491 | 5x |
data, |
492 | 5x |
si_rmse = vec_si_rmse( |
493 | 5x |
obs = .data[[obs]], |
494 | 5x |
mod = .data[[mod]], |
495 | 5x |
term = term, |
496 | 5x |
params = params, |
497 | 5x |
na.rm = TRUE |
498 |
), |
|
499 | 5x |
si_bias = vec_pi_bias( |
500 | 5x |
obs = .data[[obs]], |
501 | 5x |
mod = .data[[mod]], |
502 | 5x |
term = term, |
503 | 5x |
params = params, |
504 | 5x |
na.rm = TRUE |
505 |
), |
|
506 | 5x |
si_cor = vec_pi_cor( |
507 | 5x |
obs = .data[[obs]], |
508 | 5x |
mod = .data[[mod]], |
509 | 5x |
term = term, |
510 | 5x |
params = params, |
511 | 5x |
na.rm = TRUE |
512 |
), |
|
513 | 5x |
si_sd = vec_pi_sd( |
514 | 5x |
obs = .data[[obs]], |
515 | 5x |
mod = .data[[mod]], |
516 | 5x |
term = term, |
517 | 5x |
params = params, |
518 | 5x |
na.rm = TRUE |
519 |
), |
|
520 | 5x |
.by = dplyr::all_of(type) |
521 |
) |
|
522 |
} |
1 |
#' Statistical performance indicators |
|
2 |
#' |
|
3 |
#' These functions are provided for convenient calculation of modelling the |
|
4 |
#' listed model quality objectives performance indicators. All functions work on |
|
5 |
#' numeric R vectors, containing modelled or measured concentrations at a single |
|
6 |
#' sampling point, or averaged modelled or measured concentrations across |
|
7 |
#' multiple sampling points. |
|
8 |
#' - Bias ([vec_bias()]) |
|
9 |
#' - Root Mean Square Error ([vec_rmse()]) |
|
10 |
#' - Maximum Accepted Measurement Uncertainty ([scalar_uncer()]) |
|
11 |
#' - Root Mean Square Uncertainty ([vec_rmsu()]) |
|
12 |
#' - Pearson's Correlation coefficient ([vec_cor()]) |
|
13 |
#' - Standard Deviation ([vec_sd()]) |
|
14 |
#' - Centred Root Mean Square Error ([vec_crmse()]) |
|
15 |
#' - MQI ([vec_mqi()]) |
|
16 |
#' |
|
17 |
#' @param obs,mod,x A numeric vector of observed (measured) concentrations |
|
18 |
#' (`obs`), modelled concentrations (`mod`), or either of the previous |
|
19 |
#' depending on desired output (`x`). |
|
20 |
#' @param term Either `"short"` or `"long"`, identifying whether `data` |
|
21 |
#' represents Short- or Long-term data. |
|
22 |
#' @param params A set of `mqor` parameters, most simply constructed using |
|
23 |
#' [mqo_params()]. See [mqo_params()] for more information about how |
|
24 |
#' parameter sets can be constructed. |
|
25 |
#' @inheritParams base::mean |
|
26 |
#' |
|
27 |
#' @family model performance indicators |
|
28 |
#' |
|
29 |
#' @returns All `vec_` functions return a single numeric value. `scalar_` |
|
30 |
#' functions return a numeric vector the same length as `obs` or `mod`. |
|
31 |
#' |
|
32 |
#' @rdname statistical-performance-indicators |
|
33 |
#' @order 1 |
|
34 |
#' @export |
|
35 |
vec_bias <- function(obs, mod, na.rm = FALSE) { |
|
36 | 100x |
validate_equal(obs, mod) |
37 | 100x |
mean(mod, na.rm = na.rm) - mean(obs, na.rm = na.rm) |
38 |
} |
|
39 | ||
40 |
#' @rdname statistical-performance-indicators |
|
41 |
#' @order 2 |
|
42 |
#' @export |
|
43 |
vec_rmse <- function(obs, mod, na.rm = FALSE) { |
|
44 | 55x |
validate_equal(obs, mod) |
45 | 55x |
summed <- sum((obs - mod)^2, na.rm = na.rm) |
46 | 55x |
len <- length(obs) |
47 | 55x |
sqrt(summed / len) |
48 |
} |
|
49 | ||
50 |
#' @rdname statistical-performance-indicators |
|
51 |
#' @order 3 |
|
52 |
#' @export |
|
53 |
scalar_uncer <- function(obs, term, params = mqo_params(), na.rm = FALSE) { |
|
54 | 322x |
term <- validate_term(term) |
55 | 322x |
params$u_rv * sqrt((1 - params$a^2) * obs^2 + (params$a^2 * params$rv^2)) |
56 |
} |
|
57 | ||
58 |
#' @rdname statistical-performance-indicators |
|
59 |
#' @order 4 |
|
60 |
#' @export |
|
61 |
vec_rmsu <- function(obs, term, params = mqo_params(), na.rm = TRUE) { |
|
62 | 272x |
term <- validate_term(term) |
63 | 272x |
uncer <- scalar_uncer(obs = obs, term = term, params = params, na.rm = na.rm) |
64 | 272x |
vec_rmsu_uncer(uncer, na.rm) |
65 |
} |
|
66 | ||
67 |
#' Non-exported version of vec_rmsu that takes an uncertainty vector |
|
68 |
#' Needed for mixed type networks |
|
69 |
#' @noRd |
|
70 |
vec_rmsu_uncer <- function(uncer, na.rm) { |
|
71 | 275x |
len <- length(uncer) |
72 | 275x |
sqrt(sum(uncer^2, na.rm = na.rm) / len) |
73 |
} |
|
74 | ||
75 |
#' @rdname statistical-performance-indicators |
|
76 |
#' @order 5 |
|
77 |
#' @export |
|
78 |
vec_cor <- function(obs, mod, na.rm = FALSE) { |
|
79 | 55x |
validate_equal(obs, mod) |
80 | ||
81 | 55x |
m_diff <- mod - mean(mod, na.rm = na.rm) |
82 | 55x |
o_diff <- obs - mean(obs, na.rm = na.rm) |
83 | 55x |
top <- sum(m_diff * o_diff, na.rm = na.rm) |
84 | ||
85 | 55x |
m_bottom <- sqrt(sum((mod - mean(mod, na.rm = na.rm))^2, na.rm = na.rm)) |
86 | 55x |
o_bottom <- sqrt(sum((obs - mean(obs, na.rm = na.rm))^2, na.rm = na.rm)) |
87 | ||
88 | 55x |
top / (m_bottom * o_bottom) |
89 |
} |
|
90 | ||
91 |
#' @rdname statistical-performance-indicators |
|
92 |
#' @order 6 |
|
93 |
#' @export |
|
94 |
vec_sd <- function(x, term, na.rm = FALSE) { |
|
95 | 120x |
term <- validate_term(term) |
96 | 120x |
summed <- sum((x - mean(x, na.rm = na.rm))^2, na.rm = na.rm) |
97 | 120x |
len <- length(x) |
98 | 120x |
if (term == "long") { |
99 | 26x |
len <- len - 1L |
100 |
} |
|
101 | 120x |
sqrt(summed / len) |
102 |
} |
|
103 | ||
104 |
#' @rdname statistical-performance-indicators |
|
105 |
#' @order 7 |
|
106 |
#' @export |
|
107 |
vec_crmse <- function(obs, mod, na.rm = FALSE) { |
|
108 | 47x |
validate_equal(obs, mod) |
109 | 47x |
m_diff <- mod - mean(mod, na.rm = na.rm) |
110 | 47x |
o_diff <- obs - mean(obs, na.rm = na.rm) |
111 | 47x |
diffs <- m_diff - o_diff |
112 | 47x |
diffs_summed <- sum(diffs^2, na.rm = na.rm) |
113 | 47x |
len <- length(obs) |
114 | 47x |
sqrt(diffs_summed / len) |
115 |
} |
|
116 | ||
117 |
#' @rdname statistical-performance-indicators |
|
118 |
#' @order 8 |
|
119 |
#' @export |
|
120 |
vec_mqi <- function(obs, mod, term, params = mqo_params(), na.rm = FALSE) { |
|
121 | 45x |
validate_equal(obs, mod) |
122 | 45x |
term <- validate_term(term) |
123 | ||
124 | 45x |
rmsu <- vec_rmsu(obs = obs, term = term, params = params, na.rm = na.rm) |
125 | ||
126 | 45x |
if (term == "short") { |
127 | ! |
top <- vec_rmse(obs = obs, mod = mod, na.rm = na.rm) |
128 |
} else { |
|
129 | 45x |
top <- abs(vec_bias( |
130 | 45x |
obs = obs, |
131 | 45x |
mod = mod, |
132 | 45x |
na.rm = na.rm |
133 |
)) |
|
134 |
} |
|
135 | ||
136 | 45x |
fct <- sqrt(1 + params$b^2) |
137 | 45x |
top / (fct * rmsu) |
138 |
} |
|
139 | ||
140 |
#' Calculate the MQI from existing RMSU & error |
|
141 |
#' @noRd |
|
142 |
vec_mqi2 <- function(rmsu, err, b) { |
|
143 | 47x |
fct <- sqrt(1 + b^2) |
144 | 47x |
err / (fct * rmsu) |
145 |
} |
|
146 | ||
147 |
#' Complementary performance indicators |
|
148 |
#' |
|
149 |
#' These functions complement the MQI/MQO procedure outlined using [vec_mqi()], |
|
150 |
#' and are used in internally by [plot_mqi_scatter()]. |
|
151 |
#' |
|
152 |
#' The following indicators are available for both long-term and short-term |
|
153 |
#' data: |
|
154 |
#' - Bias performance indicator ([vec_pi_bias()]) |
|
155 |
#' - Correlation performance indicator ([vec_pi_cor()]) |
|
156 |
#' - Standard deviation performance indicator ([vec_pi_sd()]) |
|
157 |
#' |
|
158 |
#' The following 'temporal' (short-term) indicators are available: |
|
159 |
#' - Centred Root Mean Square Error temporal indicator ([vec_ti_crmse()]) |
|
160 |
#' |
|
161 |
#' The following 'spatial' (long-term) indicators are available: |
|
162 |
#' - Root Mean Square Error spatial indicator ([vec_si_rmse()]) |
|
163 |
#' |
|
164 |
#' @param obs,mod A numeric vector of observed (measured) concentrations (`obs`) |
|
165 |
#' or modelled concentrations (`mod`). |
|
166 |
#' |
|
167 |
#' @inheritParams vec_bias |
|
168 |
#' |
|
169 |
#' @family model performance indicators |
|
170 |
#' |
|
171 |
#' @rdname complementary-performance-indicators |
|
172 |
#' @order 1 |
|
173 |
#' @export |
|
174 |
vec_pi_bias <- function(obs, mod, term, params, na.rm = FALSE) { |
|
175 | 5x |
validate_equal(obs, mod) |
176 | 5x |
term <- validate_term(term) |
177 | ||
178 | 5x |
bias <- vec_bias(obs = obs, mod = mod, na.rm = na.rm) |
179 | ||
180 | 5x |
rmsu <- vec_rmsu(obs = obs, term = term, params = params, na.rm = na.rm) |
181 | ||
182 | 5x |
beta <- sqrt(1 + params$b^2) |
183 | ||
184 | 5x |
bias / (beta * rmsu) |
185 |
} |
|
186 | ||
187 |
#' @noRd |
|
188 |
vec_pi_bias2 <- function(bias, rmsu, b) { |
|
189 | 47x |
bias / (sqrt(1 + b^2) * rmsu) |
190 |
} |
|
191 | ||
192 |
#' @rdname complementary-performance-indicators |
|
193 |
#' @order 2 |
|
194 |
#' @export |
|
195 |
vec_pi_cor <- function(obs, mod, term, params, na.rm = FALSE) { |
|
196 | 5x |
validate_equal(obs, mod) |
197 | 5x |
term <- validate_term(term) |
198 | 5x |
R <- vec_cor(obs, mod, na.rm = na.rm) |
199 | 5x |
sd_o <- vec_sd(obs, term, na.rm = na.rm) |
200 | 5x |
sd_m <- vec_sd(mod, term, na.rm = na.rm) |
201 | 5x |
rmsu2 <- vec_rmsu(obs = obs, term = term, params = params, na.rm = na.rm)^2 |
202 | 5x |
beta <- 1 + params$b^2 |
203 | 5x |
(1 - R) * ((2 * sd_o * sd_m) / (beta * rmsu2)) |
204 |
} |
|
205 | ||
206 |
#' @noRd |
|
207 |
vec_pi_cor2 <- function(cor, sd_o, sd_m, rmsu, b) { |
|
208 | 47x |
beta <- 1 + b^2 |
209 | 47x |
rmsu2 <- rmsu^2 |
210 | 47x |
(1 - cor) * ((2 * sd_o * sd_m) / (beta * rmsu2)) |
211 |
} |
|
212 | ||
213 |
#' @rdname complementary-performance-indicators |
|
214 |
#' @order 3 |
|
215 |
#' @export |
|
216 |
vec_pi_sd <- function(obs, mod, term, params, na.rm = FALSE) { |
|
217 | 5x |
validate_equal(obs, mod) |
218 | 5x |
term <- validate_term(term) |
219 | 5x |
sd_o <- vec_sd(obs, term, na.rm = na.rm) |
220 | 5x |
sd_m <- vec_sd(mod, term, na.rm = na.rm) |
221 | 5x |
rmsu <- vec_rmsu(obs = obs, term = term, params = params, na.rm = na.rm) |
222 | 5x |
beta <- sqrt(1 + params$b^2) |
223 | 5x |
(sd_m - sd_o) / (beta * rmsu) |
224 |
} |
|
225 | ||
226 |
#' @noRd |
|
227 |
vec_pi_sd2 <- function(sd_o, sd_m, rmsu, b) { |
|
228 | 47x |
beta <- sqrt(1 + b^2) |
229 | 47x |
(sd_m - sd_o) / (beta * rmsu) |
230 |
} |
|
231 | ||
232 |
#' @rdname complementary-performance-indicators |
|
233 |
#' @order 4 |
|
234 |
#' @export |
|
235 |
vec_ti_crmse <- function(obs, mod, term, params, na.rm = FALSE) { |
|
236 | ! |
validate_equal(obs, mod) |
237 | ! |
term <- validate_term(term) |
238 | ! |
if (term == "long") { |
239 | ! |
cli::cli_abort( |
240 | ! |
"This performance indicator is not supported for long-term data." |
241 |
) |
|
242 |
} |
|
243 | ||
244 | ! |
mqi <- vec_mqi( |
245 | ! |
obs = obs, |
246 | ! |
mod = mod, |
247 | ! |
term = term, |
248 | ! |
params = params, |
249 | ! |
na.rm = na.rm |
250 |
) |
|
251 | ||
252 | ! |
tib <- vec_pi_bias( |
253 | ! |
obs = obs, |
254 | ! |
mod = mod, |
255 | ! |
term = term, |
256 | ! |
params = params, |
257 | ! |
na.rm = na.rm |
258 |
) |
|
259 | ||
260 | ! |
sqrt(mqi^2 - tib^2) |
261 |
} |
|
262 | ||
263 |
#' @noRd |
|
264 |
vec_ti_crmse2 <- function(mqi, tib) { |
|
265 | 47x |
sqrt(mqi^2 - tib^2) |
266 |
} |
|
267 | ||
268 |
#' @rdname complementary-performance-indicators |
|
269 |
#' @order 5 |
|
270 |
#' @export |
|
271 |
vec_si_rmse <- function(obs, mod, term, params, na.rm = FALSE) { |
|
272 | 5x |
validate_equal(obs, mod) |
273 | 5x |
term <- validate_term(term) |
274 | ||
275 | 5x |
if (term == "short") { |
276 | ! |
cli::cli_abort( |
277 | ! |
"This performance indicator is not supported for short-term data." |
278 |
) |
|
279 |
} |
|
280 | ||
281 | 5x |
rmse <- vec_rmse(obs = obs, mod = mod, na.rm = na.rm) |
282 | ||
283 | 5x |
rmsu <- vec_rmsu(obs = obs, term = term, params = params, na.rm = na.rm) |
284 | ||
285 | 5x |
beta <- sqrt(1 + params$b^2) |
286 | ||
287 | 5x |
rmse / (beta * rmsu) |
288 |
} |
|
289 | ||
290 |
#' Function to ensure vectors are of equal length |
|
291 |
#' @noRd |
|
292 |
validate_equal <- function(obs, mod) { |
|
293 | 323x |
if (length(mod) != length(obs)) { |
294 | 1x |
cli::cli_abort( |
295 | 1x |
"The length of observed values, obs, must be equal to the length of the modelled values, mod." |
296 |
) |
|
297 |
} |
|
298 |
} |
|
299 | ||
300 |
#' Check term is appropriate |
|
301 |
#' @noRd |
|
302 |
validate_term <- function(x) { |
|
303 | 781x |
rlang::arg_match(x, c("short", "long")) |
304 |
} |
|
305 | ||
306 |
#' Check 'type' col only has either 'fixed' or 'indicative' |
|
307 |
#' @noRd |
|
308 |
validate_type <- function(x) { |
|
309 | 1x |
if (any(!x %in% c("fixed", "indicative"))) { |
310 | 1x |
cli::cli_abort( |
311 | 1x |
"The {.field type} column should only contain either 'fixed' or 'indicative'." |
312 |
) |
|
313 |
} |
|
314 |
} |
1 |
#' Plot a scatter chart visualising the Modelling Quality Indicator (MQI) |
|
2 |
#' |
|
3 |
#' This function produces a plot comparing modelled and measured concentrations. |
|
4 |
#' For long-term data, a traditional scatter plot of observed vs modelled |
|
5 |
#' concentrations is plotted with a shaded area showing the acceptability range |
|
6 |
#' and dashed boundaries showing the maximum accepted measurement uncertainty. |
|
7 |
#' For short-term data, a 'bullseye' scatter plot is plotted showing two |
|
8 |
#' complementary performance indicators with a shaded area showing the MQO |
|
9 |
#' fulfilment area and a dashed boundary showing the maximum accepted |
|
10 |
#' measurement uncertainty. |
|
11 |
#' |
|
12 |
#' @inheritParams plot_comparison_bars |
|
13 |
#' |
|
14 |
#' @param stats_shortterm,stats_longterm The output of [summarise_mqo_stats()]. |
|
15 |
#' All relevant information (e.g., `term`, `params_fixed`, etc.) will be |
|
16 |
#' passed to this function. `stats_shortterm` should contain short-term |
|
17 |
#' (temporal) statistics, `stats_longterm` should contain long-term (spatial) |
|
18 |
#' statistics. Either of these can be `NULL`, but at least one should be |
|
19 |
#' provided. |
|
20 |
#' |
|
21 |
#' @param show_dashed Should extra dashed lines be shown on the figures? These |
|
22 |
#' show the maximum accepted measurement uncertainty, but can overcrowd the |
|
23 |
#' plotting area. |
|
24 |
#' |
|
25 |
#' @param color_fixed,color_indicative,color_outline The colours to use for |
|
26 |
#' fixed data, indicative data, and any another annotations. Can be expressed |
|
27 |
#' as hex codes, or any colours listed in [colors()]. |
|
28 |
#' |
|
29 |
#' @param gp A [grid::gpar()] object passed to [grid::textGrob()], used to |
|
30 |
#' control the appearance of the parameter & indicator annotations. `fontsize` |
|
31 |
#' is likely the most useful argument, but many options are available through |
|
32 |
#' [grid::gpar()]. Only applies when `interactive = FALSE`. |
|
33 |
#' |
|
34 |
#' @family plotting functions |
|
35 |
#' |
|
36 |
#' @author Jack Davison |
|
37 |
#' |
|
38 |
#' @examples |
|
39 |
#' long <- summarise_mqo_stats(demo_longterm, pollutant = "PM10") |
|
40 |
#' |
|
41 |
#' short <- summarise_mqo_stats(demo_shortterm, pollutant = "PM10") |
|
42 |
#' |
|
43 |
#' plot_mqi_scatter(stats_shortterm = short) |
|
44 |
#' |
|
45 |
#' plot_mqi_scatter(stats_shortterm = short, stats_longterm = long) |
|
46 |
#' |
|
47 |
#' plot_mqi_scatter(stats_longterm = long) |
|
48 |
#' |
|
49 |
#' @export |
|
50 |
plot_mqi_scatter <- function( |
|
51 |
stats_shortterm = NULL, |
|
52 |
stats_longterm = NULL, |
|
53 |
show_dashed = FALSE, |
|
54 |
color_fixed = "#4269D0", |
|
55 |
color_indicative = "#EFB118", |
|
56 |
color_outline = "black", |
|
57 |
interactive = FALSE, |
|
58 |
gp = grid::gpar(fontsize = 10) |
|
59 |
) { |
|
60 | 6x |
validate_mqor_stats(stats_shortterm) |
61 | 6x |
validate_mqor_stats(stats_longterm) |
62 | 6x |
if (!is.null(stats_shortterm)) { |
63 | 4x |
term <- "short" |
64 | 4x |
params_fixed <- stats_shortterm$inputs$params_fixed |
65 | 4x |
params_indicative <- stats_shortterm$inputs$params_indicative |
66 | ||
67 | 4x |
if (!"fixed" %in% stats_shortterm$by_site$type) { |
68 | ! |
params_fixed <- NULL |
69 |
} |
|
70 | 4x |
if (!"indicative" %in% stats_shortterm$by_site$type) { |
71 | 2x |
params_indicative <- NULL |
72 |
} |
|
73 | ||
74 | 4x |
uncer <- |
75 | 4x |
dplyr::bind_rows( |
76 | 4x |
"fixed" = make_circle(1 / sqrt(1 + params_fixed$b^2)), |
77 | 4x |
"indicative" = make_circle(1 / sqrt(1 + params_indicative$b^2)), |
78 | 4x |
.id = "type" |
79 |
) |
|
80 | ||
81 | 4x |
res <- paste0( |
82 | 4x |
round(stats_shortterm$summary$mqi_90, 2), |
83 |
" (", |
|
84 | 4x |
ifelse(stats_shortterm$summary$mqo, "Pass", "Fail"), |
85 |
")" |
|
86 |
) |
|
87 | ||
88 | 4x |
if (!is.null(stats_longterm)) { |
89 | 2x |
res2 <- paste0( |
90 | 2x |
round(stats_longterm$summary$mqi_90, 2), |
91 |
" (", |
|
92 | 2x |
ifelse(stats_longterm$summary$mqo, "Pass", "Fail"), |
93 |
")" |
|
94 |
) |
|
95 |
} |
|
96 | ||
97 | 4x |
if (interactive) { |
98 | 2x |
if (!is.null(stats_longterm)) { |
99 | 1x |
html <- |
100 | 1x |
glue::glue( |
101 |
" |
|
102 | 1x |
<b>MQI<sub>{term},90th</sub> = {res}</b><br> |
103 | 1x |
<b>MQI<sub>long,90th</sub> = {res2}</b><br> |
104 | 1x |
α<sub>{term}</sub> = {combine(params_fixed$a, params_indicative$a)}<br> |
105 | 1x |
β<sub>{term}</sub> = {combine(params_fixed$b, params_indicative$b)}<br> |
106 | 1x |
RV<sub>{term}</sub> = {combine(params_fixed$rv, params_indicative$rv)}<br> |
107 | 1x |
U<sub>0r</sub>(RV<sub>{term}</sub>) = {combine(params_fixed$rv, params_indicative$rv)}<br> |
108 |
" |
|
109 |
) |
|
110 |
} else { |
|
111 | 1x |
html <- |
112 | 1x |
glue::glue( |
113 | 1x |
"<b>MQI<sub>{term},90th</sub> = {res}</b><br> |
114 | 1x |
α<sub>{term}</sub> = {combine(params_fixed$a, params_indicative$a)}<br> |
115 | 1x |
β<sub>{term}</sub> = {combine(params_fixed$b, params_indicative$b)}<br> |
116 | 1x |
RV<sub>{term}</sub> = {combine(params_fixed$rv, params_indicative$rv)}<br> |
117 | 1x |
U<sub>0r</sub>(RV<sub>{term}</sub>) = {combine(params_fixed$rv, params_indicative$rv)}<br>" |
118 |
) |
|
119 |
} |
|
120 | ||
121 | 2x |
html2 <- |
122 | 2x |
glue::glue( |
123 |
' |
|
124 | 2x |
TI<sub>B</sub> = {fmt_pi(stats_shortterm$summary, "ti_bias_90")}<br> |
125 | 2x |
TI<sub>R</sub> = {fmt_pi(stats_shortterm$summary, "ti_cor_90")}<br> |
126 | 2x |
TI<sub>SD</sub> = {fmt_pi(stats_shortterm$summary, "ti_sd_90")}<br> |
127 |
' |
|
128 |
) |
|
129 | ||
130 |
# get outline color RGB - for the highlight area |
|
131 | 2x |
color_outline_rgb <- grDevices::col2rgb(col = color_outline) |
132 | ||
133 | 2x |
plt <- |
134 | 2x |
plotly::plot_ly( |
135 | 2x |
colors = c( |
136 | 2x |
"fixed" = color_fixed, |
137 | 2x |
"indicative" = color_indicative |
138 |
) |
|
139 |
) |> |
|
140 | 2x |
plotly::add_ribbons( |
141 | 2x |
data = make_circle(1), |
142 | 2x |
x = ~x, |
143 | 2x |
ymin = ~ymin, |
144 | 2x |
ymax = ~ymax, |
145 | 2x |
color = I(color_outline), |
146 | 2x |
fillcolor = grDevices::rgb( |
147 | 2x |
red = color_outline_rgb[["red", 1]], |
148 | 2x |
green = color_outline_rgb[["green", 1]], |
149 | 2x |
blue = color_outline_rgb[["blue", 1]], |
150 | 2x |
maxColorValue = 255, |
151 | 2x |
alpha = 255 * 0.05 |
152 |
), |
|
153 | 2x |
showlegend = FALSE, |
154 | 2x |
hoverinfo = "skip" |
155 |
) |
|
156 | ||
157 | 2x |
if (show_dashed) { |
158 | ! |
plt <- |
159 | ! |
plotly::add_ribbons( |
160 | ! |
p = plt, |
161 | ! |
data = uncer, |
162 | ! |
x = ~x, |
163 | ! |
ymin = ~ymin, |
164 | ! |
ymax = ~ymax, |
165 | ! |
color = ~type, |
166 | ! |
line = list(dash = "dot"), |
167 | ! |
fillcolor = grDevices::rgb(0, 0, 0, 0), |
168 | ! |
legendgroup = ~type, |
169 | ! |
showlegend = FALSE |
170 |
) |
|
171 |
} |
|
172 | ||
173 | 2x |
plt <- |
174 | 2x |
plt |> |
175 | 2x |
plotly::add_markers( |
176 | 2x |
x = stats_shortterm$by_site$ti_crmse, |
177 | 2x |
y = stats_shortterm$by_site$ti_bias, |
178 | 2x |
color = stats_shortterm$by_site$type, |
179 | 2x |
text = stats_shortterm$by_site$site, |
180 | 2x |
legendgroup = stats_shortterm$by_site$type |
181 |
) |> |
|
182 | 2x |
plotly::layout( |
183 | 2x |
xaxis = list( |
184 | 2x |
range = range(c(-2, 2)), |
185 | 2x |
constrain = "domain", |
186 | 2x |
title = "TI<sub>CRMSE</sub>" |
187 |
), |
|
188 | 2x |
yaxis = list( |
189 | 2x |
range = range(c(-2, 2)), |
190 | 2x |
constrain = "domain", |
191 | 2x |
scaleanchor = "x", |
192 | 2x |
scaleratio = 1, |
193 | 2x |
title = "TI<sub>BIAS</sub>" |
194 |
), |
|
195 | 2x |
legend = list( |
196 | 2x |
orientation = "h", |
197 | 2x |
y = 1.1 |
198 |
) |
|
199 |
) |> |
|
200 | 2x |
plotly::add_annotations( |
201 | 2x |
text = html, |
202 | 2x |
y = 0.8, |
203 | 2x |
x = 0.25, |
204 | 2x |
yref = "y domain", |
205 | 2x |
xref = "x domain", |
206 | 2x |
align = "left", |
207 | 2x |
valign = "top" |
208 |
) |> |
|
209 | 2x |
plotly::add_annotations( |
210 | 2x |
text = html2, |
211 | 2x |
y = 0.1, |
212 | 2x |
x = 0.9, |
213 | 2x |
yref = "y domain", |
214 | 2x |
xref = "x domain", |
215 | 2x |
align = "right", |
216 | 2x |
valign = "bottom" |
217 |
) |
|
218 |
} else { |
|
219 | 2x |
sub <- paste(term, "90th", sep = ",") |
220 | ||
221 | 2x |
grob_mqi <- grid::textGrob(bquote(MQI[.(sub)] == .(res)), gp = gp) |
222 | ||
223 | 2x |
if (!is.null(stats_longterm)) { |
224 | 1x |
sub2 <- paste("long", "90th", sep = ",") |
225 | 1x |
grob_mqi2 <- grid::textGrob(bquote(MQI[.(sub2)] == .(res2)), gp = gp) |
226 |
} else { |
|
227 | 1x |
grob_mqi2 <- grid::textGrob("") |
228 |
} |
|
229 | ||
230 | 2x |
grob_a <- grid::textGrob( |
231 | 2x |
bquote( |
232 | 2x |
alpha[.(term)] == |
233 | 2x |
.(combine( |
234 | 2x |
params_fixed$a, |
235 | 2x |
params_indicative$a |
236 |
)) |
|
237 |
), |
|
238 | 2x |
gp = gp |
239 |
) |
|
240 | 2x |
grob_b <- grid::textGrob( |
241 | 2x |
bquote( |
242 | 2x |
beta[.(term)] == .(combine(params_fixed$b, params_indicative$b)) |
243 |
), |
|
244 | 2x |
gp = gp |
245 |
) |
|
246 | 2x |
grob_rv <- grid::textGrob( |
247 | 2x |
bquote( |
248 | 2x |
RV[.(term)] == .(combine(params_fixed$rv, params_indicative$rv)) |
249 |
), |
|
250 | 2x |
gp = gp |
251 |
) |
|
252 | 2x |
grob_urv <- grid::textGrob( |
253 | 2x |
bquote( |
254 | 2x |
U[Or](RV[.(term)]) == |
255 | 2x |
.(combine(params_fixed$u_rv, params_indicative$u_rv, TRUE)) |
256 |
), |
|
257 | 2x |
gp = gp |
258 |
) |
|
259 | ||
260 | 2x |
grob_tib <- grid::textGrob( |
261 | 2x |
bquote(TI[B] == .(fmt_pi(stats_shortterm$summary, "ti_bias_90"))), |
262 | 2x |
gp = gp |
263 |
) |
|
264 | 2x |
grob_tir <- grid::textGrob( |
265 | 2x |
bquote(TI[R] == .(fmt_pi(stats_shortterm$summary, "ti_cor_90"))), |
266 | 2x |
gp = gp |
267 |
) |
|
268 | 2x |
grob_tis <- grid::textGrob( |
269 | 2x |
bquote(TI[SD] == .(fmt_pi(stats_shortterm$summary, "ti_sd_90"))), |
270 | 2x |
gp = gp |
271 |
) |
|
272 | ||
273 | 2x |
plt <- |
274 | 2x |
ggplot2::ggplot(stats_shortterm$by_site) + |
275 | 2x |
ggplot2::geom_ribbon( |
276 | 2x |
data = make_circle(1), |
277 | 2x |
ggplot2::aes( |
278 | 2x |
x = .data$x, |
279 | 2x |
ymin = .data$ymin, |
280 | 2x |
ymax = .data$ymax |
281 |
), |
|
282 | 2x |
fill = color_outline, |
283 | 2x |
color = color_outline, |
284 | 2x |
alpha = 0.05 |
285 |
) |
|
286 | ||
287 | 2x |
if (show_dashed) { |
288 | ! |
plt <- |
289 | ! |
plt + |
290 | ! |
ggplot2::geom_ribbon( |
291 | ! |
data = uncer, |
292 | ! |
ggplot2::aes( |
293 | ! |
x = .data$x, |
294 | ! |
ymin = .data$ymin, |
295 | ! |
ymax = .data$ymax, |
296 | ! |
color = .data$type |
297 |
), |
|
298 | ! |
fill = NA, |
299 | ! |
lty = 2, |
300 | ! |
show.legend = dplyr::n_distinct(stats_shortterm$by_site$type) > 1L |
301 |
) |
|
302 |
} |
|
303 | ||
304 | 2x |
plt <- plt + |
305 | 2x |
ggplot2::geom_hline(yintercept = 0) + |
306 | 2x |
ggplot2::geom_vline(xintercept = 0) + |
307 | 2x |
ggplot2::geom_point( |
308 | 2x |
ggplot2::aes( |
309 | 2x |
x = .data$ti_crmse, |
310 | 2x |
y = .data$ti_bias, |
311 | 2x |
color = .data$type |
312 |
), |
|
313 | 2x |
show.legend = dplyr::n_distinct(stats_shortterm$by_site$type) > 1L |
314 |
) + |
|
315 | 2x |
ggplot2::coord_equal() + |
316 | 2x |
ggplot2::scale_x_continuous( |
317 | 2x |
expand = ggplot2::expansion(), |
318 | 2x |
limits = c(-2, 2) |
319 |
) + |
|
320 | 2x |
ggplot2::scale_y_continuous( |
321 | 2x |
expand = ggplot2::expansion(), |
322 | 2x |
limits = c(-2, 2) |
323 |
) + |
|
324 | 2x |
ggplot2::theme_bw(14) + |
325 | 2x |
ggplot2::labs( |
326 | 2x |
x = expression(TI[CRMSE]), |
327 | 2x |
y = expression(TI[BIAS]), |
328 | 2x |
color = NULL |
329 |
) + |
|
330 | 2x |
ggplot2::theme( |
331 | 2x |
legend.justification = "left", |
332 | 2x |
legend.spacing = ggplot2::unit(0.2, "cm"), |
333 | 2x |
legend.margin = ggplot2::margin(0, 0, 0, 0, unit = "cm"), |
334 | 2x |
legend.title = ggplot2::element_text(face = "bold") |
335 |
) + |
|
336 | 2x |
ggplot2::scale_color_manual( |
337 | 2x |
values = c( |
338 | 2x |
"fixed" = color_fixed, |
339 | 2x |
"indicative" = color_indicative |
340 |
), |
|
341 | 2x |
aesthetics = c("fill", "colour") |
342 |
) + |
|
343 | 2x |
ggplot2::guides( |
344 | 2x |
fill = ggplot2::guide_legend(position = "top"), |
345 | 2x |
custom_mqi = ggplot2::guide_custom(grob_mqi, order = 1), |
346 | 2x |
custom_mqi2 = ggplot2::guide_custom(grob_mqi2, order = 2), |
347 | 2x |
custom_a = ggplot2::guide_custom( |
348 | 2x |
grob_a, |
349 | 2x |
order = 3, |
350 | 2x |
title = "\nUncertainty\nParameters" |
351 |
), |
|
352 | 2x |
custom_b = ggplot2::guide_custom(grob_b, order = 4), |
353 | 2x |
custom_rv = ggplot2::guide_custom(grob_rv, order = 5), |
354 | 2x |
custom_urv = ggplot2::guide_custom(grob_urv, order = 6), |
355 | 2x |
custom_tib = ggplot2::guide_custom( |
356 | 2x |
grob_tib, |
357 | 2x |
order = 7, |
358 | 2x |
title = "\nComplementary\nIndicators" |
359 |
), |
|
360 | 2x |
custom_tir = ggplot2::guide_custom(grob_tir, order = 8), |
361 | 2x |
custom_tis = ggplot2::guide_custom(grob_tis, order = 9) |
362 |
) |
|
363 |
} |
|
364 | 2x |
} else if (!is.null(stats_longterm)) { |
365 | 2x |
term <- "long" |
366 | 2x |
params_fixed <- stats_longterm$inputs$params_fixed |
367 | 2x |
params_indicative <- stats_longterm$inputs$params_indicative |
368 | ||
369 | 2x |
max_om <- max( |
370 | 2x |
c(stats_longterm$by_site$obs, stats_longterm$by_site$mod), |
371 | 2x |
na.rm = TRUE |
372 |
) |
|
373 | ||
374 | 2x |
build_uncer_bands <- function(params) { |
375 | 4x |
if (is.null(params)) { |
376 | 2x |
return(data.frame( |
377 | 2x |
obs = numeric(), |
378 | 2x |
u = numeric(), |
379 | 2x |
ar = numeric() |
380 |
)) |
|
381 |
} |
|
382 | 2x |
range_o <- pretty(0:max_om) |
383 | 2x |
diff_o <- diff(range_o[1:2]) |
384 | ||
385 | 2x |
uncer <- |
386 | 2x |
data.frame(obs = pretty(n = 100, (0 - diff_o):(max_om + diff_o))) |> |
387 | 2x |
dplyr::rowwise() |> |
388 | 2x |
dplyr::mutate( |
389 | 2x |
u = vec_rmsu( |
390 | 2x |
.data$obs, |
391 | 2x |
term = term, |
392 | 2x |
params = params, |
393 | 2x |
na.rm = TRUE |
394 |
), |
|
395 | 2x |
ar = sqrt(1 + params$b^2) * .data$u |
396 |
) |> |
|
397 | 2x |
dplyr::ungroup() |
398 | ||
399 | 2x |
return(uncer) |
400 |
} |
|
401 | ||
402 | 2x |
if (!"fixed" %in% stats_longterm$by_site$type) { |
403 | ! |
params_fixed <- NULL |
404 |
} |
|
405 | 2x |
if (!"indicative" %in% stats_longterm$by_site$type) { |
406 | 2x |
params_indicative <- NULL |
407 |
} |
|
408 | ||
409 | 2x |
uncer <- |
410 | 2x |
dplyr::bind_rows( |
411 | 2x |
"fixed" = build_uncer_bands(params_fixed), |
412 | 2x |
"indicative" = build_uncer_bands(params_indicative), |
413 | 2x |
.id = "type" |
414 |
) |
|
415 | ||
416 | 2x |
res <- paste0( |
417 | 2x |
round(stats_longterm$summary$mqi_90, 2), |
418 |
" (", |
|
419 | 2x |
ifelse(stats_longterm$summary$mqo, "Pass", "Fail"), |
420 |
")" |
|
421 |
) |
|
422 | ||
423 | 2x |
if (interactive) { |
424 | 1x |
html <- |
425 | 1x |
glue::glue( |
426 | 1x |
"<b>MQI<sub>{term},90th</sub> = {res}</b><br> |
427 | 1x |
α<sub>{term}</sub> = {combine(params_fixed$a, params_indicative$a)}<br> |
428 | 1x |
β<sub>{term}</sub> = {combine(params_fixed$b, params_indicative$b)}<br> |
429 | 1x |
RV<sub>{term}</sub> = {combine(params_fixed$rv, params_indicative$rv)}<br> |
430 | 1x |
U<sub>0r</sub>(RV<sub>{term}</sub>) = {combine(params_fixed$rv, params_indicative$rv)}<br>" |
431 |
) |
|
432 | ||
433 | 1x |
html2 <- |
434 | 1x |
glue::glue( |
435 |
' |
|
436 | 1x |
SI<sub>RMSE</sub> = {fmt_pi(stats_longterm$by_type, "si_rmse")}<br> |
437 | 1x |
SI<sub>B</sub> = {fmt_pi(stats_longterm$by_type, "si_bias")}<br> |
438 | 1x |
SI<sub>R</sub> = {fmt_pi(stats_longterm$by_type, "si_cor")}<br> |
439 | 1x |
SI<sub>SD</sub> = {fmt_pi(stats_longterm$by_type, "si_sd")}<br> |
440 |
' |
|
441 |
) |
|
442 | ||
443 | 1x |
plt <- |
444 | 1x |
plotly::plot_ly( |
445 | 1x |
colors = c( |
446 | 1x |
"fixed" = color_fixed, |
447 | 1x |
"indicative" = color_indicative |
448 |
) |
|
449 |
) |> |
|
450 | 1x |
plotly::add_lines( |
451 | 1x |
x = ceiling(c(0, max_om)), |
452 | 1x |
y = ceiling(c(0, max_om)), |
453 | 1x |
color = I(color_outline), |
454 | 1x |
hoverinfo = "skip", |
455 | 1x |
showlegend = FALSE |
456 |
) |> |
|
457 | 1x |
plotly::add_ribbons( |
458 | 1x |
data = uncer, |
459 | 1x |
x = ~obs, |
460 | 1x |
ymin = ~ obs - ar, |
461 | 1x |
ymax = ~ obs + ar, |
462 | 1x |
color = ~type, |
463 | 1x |
legendgroup = ~type, |
464 | 1x |
opacity = 0.5 |
465 |
) |
|
466 | ||
467 | 1x |
if (show_dashed) { |
468 | ! |
plt <- |
469 | ! |
plotly::add_ribbons( |
470 | ! |
p = plt, |
471 | ! |
data = uncer, |
472 | ! |
x = ~obs, |
473 | ! |
ymin = ~ obs - u, |
474 | ! |
ymax = ~ obs + u, |
475 | ! |
color = ~type, |
476 | ! |
line = list(dash = "dot"), |
477 | ! |
fillcolor = grDevices::rgb(0, 0, 0, 0), |
478 | ! |
legendgroup = ~type, |
479 | ! |
opacity = 0.5, |
480 | ! |
showlegend = FALSE |
481 |
) |
|
482 |
} |
|
483 | ||
484 | 1x |
plt <- plt |> |
485 | 1x |
plotly::add_markers( |
486 | 1x |
x = stats_longterm$by_site$obs, |
487 | 1x |
y = stats_longterm$by_site$mod, |
488 | 1x |
color = stats_longterm$by_site$type, |
489 | 1x |
text = stats_longterm$by_site$site |
490 |
) |> |
|
491 | 1x |
plotly::layout( |
492 | 1x |
xaxis = list( |
493 | 1x |
range = range(pretty(c(0, max_om))), |
494 | 1x |
constrain = "domain", |
495 | 1x |
title = "observed concentrations (μg m<sup>-3</sup>)" |
496 |
), |
|
497 | 1x |
yaxis = list( |
498 | 1x |
range = range(pretty(c(0, max_om))), |
499 | 1x |
constrain = "domain", |
500 | 1x |
scaleanchor = "x", |
501 | 1x |
scaleratio = 1, |
502 | 1x |
title = "modelled concentrations (μg m<sup>-3</sup>)" |
503 |
), |
|
504 | 1x |
legend = list( |
505 | 1x |
orientation = "h", |
506 | 1x |
y = 1.1 |
507 |
) |
|
508 |
) |> |
|
509 | 1x |
plotly::add_annotations( |
510 | 1x |
text = html, |
511 | 1x |
y = 0.8, |
512 | 1x |
x = 0.25, |
513 | 1x |
yref = "y domain", |
514 | 1x |
xref = "x domain", |
515 | 1x |
align = "left", |
516 | 1x |
valign = "top" |
517 |
) |> |
|
518 | 1x |
plotly::add_annotations( |
519 | 1x |
text = html2, |
520 | 1x |
y = 0.1, |
521 | 1x |
x = 0.9, |
522 | 1x |
yref = "y domain", |
523 | 1x |
xref = "x domain", |
524 | 1x |
align = "right", |
525 | 1x |
valign = "bottom" |
526 |
) |
|
527 |
} else { |
|
528 | 1x |
sub <- paste(term, "90th", sep = ",") |
529 | ||
530 | 1x |
grob_mqi <- grid::textGrob(bquote(MQI[.(sub)] == .(res)), gp = gp) |
531 | ||
532 | 1x |
grob_a <- grid::textGrob( |
533 | 1x |
bquote( |
534 | 1x |
alpha[.(term)] == .(combine(params_fixed$a, params_indicative$a)) |
535 |
), |
|
536 | 1x |
gp = gp |
537 |
) |
|
538 | 1x |
grob_b <- grid::textGrob( |
539 | 1x |
bquote( |
540 | 1x |
beta[.(term)] == .(combine(params_fixed$b, params_indicative$b)) |
541 |
), |
|
542 | 1x |
gp = gp |
543 |
) |
|
544 | 1x |
grob_rv <- grid::textGrob( |
545 | 1x |
bquote( |
546 | 1x |
RV[.(term)] == .(combine(params_fixed$rv, params_indicative$rv)) |
547 |
), |
|
548 | 1x |
gp = gp |
549 |
) |
|
550 | 1x |
grob_urv <- grid::textGrob( |
551 | 1x |
bquote( |
552 | 1x |
U[Or](RV[.(term)]) == |
553 | 1x |
.(combine(params_fixed$u_rv, params_indicative$u_rv, TRUE)) |
554 |
), |
|
555 | 1x |
gp = gp |
556 |
) |
|
557 | ||
558 | 1x |
grob_siu <- grid::textGrob( |
559 | 1x |
bquote(SI[RMSE] == .(fmt_pi(stats_longterm$by_type, "si_rmse"))), |
560 | 1x |
gp = gp |
561 |
) |
|
562 | 1x |
grob_sib <- grid::textGrob( |
563 | 1x |
bquote(SI[B] == .(fmt_pi(stats_longterm$by_type, "si_bias"))), |
564 | 1x |
gp = gp |
565 |
) |
|
566 | 1x |
grob_sir <- grid::textGrob( |
567 | 1x |
bquote(SI[R] == .(fmt_pi(stats_longterm$by_type, "si_cor"))), |
568 | 1x |
gp = gp |
569 |
) |
|
570 | 1x |
grob_sis <- grid::textGrob( |
571 | 1x |
bquote(SI[SD] == .(fmt_pi(stats_longterm$by_type, "si_sd"))), |
572 | 1x |
gp = gp |
573 |
) |
|
574 | ||
575 | 1x |
plt <- |
576 | 1x |
stats_longterm$by_site |> |
577 | 1x |
ggplot2::ggplot(ggplot2::aes( |
578 | 1x |
x = .data$obs, |
579 | 1x |
y = .data$mod |
580 |
)) + |
|
581 | 1x |
ggplot2::coord_equal(xlim = c(0, max_om), ylim = c(0, max_om)) + |
582 | 1x |
ggplot2::geom_ribbon( |
583 | 1x |
data = uncer, |
584 | 1x |
ggplot2::aes( |
585 | 1x |
ymin = .data$obs - .data$ar, |
586 | 1x |
ymax = .data$obs + .data$ar, |
587 | 1x |
x = .data$obs, |
588 | 1x |
fill = .data$type |
589 |
), |
|
590 | 1x |
inherit.aes = F, |
591 | 1x |
show.legend = dplyr::n_distinct(stats_longterm$by_site$type) > 1L, |
592 | 1x |
alpha = 0.15 |
593 |
) + |
|
594 | 1x |
ggplot2::geom_abline(color = color_outline) + |
595 | 1x |
ggplot2::geom_line( |
596 | 1x |
data = uncer, |
597 | 1x |
ggplot2::aes( |
598 | 1x |
x = .data$obs, |
599 | 1x |
y = .data$obs + .data$ar, |
600 | 1x |
color = .data$type |
601 |
), |
|
602 | 1x |
show.legend = FALSE |
603 |
) + |
|
604 | 1x |
ggplot2::geom_line( |
605 | 1x |
data = uncer, |
606 | 1x |
ggplot2::aes( |
607 | 1x |
x = .data$obs, |
608 | 1x |
y = .data$obs - .data$ar, |
609 | 1x |
color = .data$type |
610 |
), |
|
611 | 1x |
show.legend = FALSE |
612 |
) |
|
613 | ||
614 | 1x |
if (show_dashed) { |
615 | ! |
plt <- |
616 | ! |
plt + |
617 | ! |
ggplot2::geom_line( |
618 | ! |
data = uncer, |
619 | ! |
ggplot2::aes( |
620 | ! |
x = .data$obs, |
621 | ! |
y = .data$obs + .data$u, |
622 | ! |
color = .data$type |
623 |
), |
|
624 | ! |
show.legend = FALSE, |
625 | ! |
lty = 2 |
626 |
) + |
|
627 | ! |
ggplot2::geom_line( |
628 | ! |
data = uncer, |
629 | ! |
ggplot2::aes( |
630 | ! |
x = .data$obs, |
631 | ! |
y = .data$obs - .data$u, |
632 | ! |
color = .data$type |
633 |
), |
|
634 | ! |
show.legend = FALSE, |
635 | ! |
lty = 2 |
636 |
) |
|
637 |
} |
|
638 | ||
639 | 1x |
plt <- |
640 | 1x |
plt + |
641 | 1x |
ggplot2::geom_point( |
642 | 1x |
size = 3, |
643 | 1x |
ggplot2::aes(color = .data$type), |
644 | 1x |
show.legend = FALSE |
645 |
) + |
|
646 | 1x |
ggplot2::scale_x_continuous(expand = ggplot2::expansion(c(0, .1))) + |
647 | 1x |
ggplot2::scale_y_continuous(expand = ggplot2::expansion(c(0, .1))) + |
648 | 1x |
ggplot2::theme_bw(14) + |
649 | 1x |
ggplot2::labs( |
650 | 1x |
x = format_text("observed concentrations (ug/m3)"), |
651 | 1x |
y = format_text("modelled concentrations (ug/m3)"), |
652 | 1x |
fill = NULL, |
653 | 1x |
color = NULL |
654 |
) + |
|
655 | 1x |
ggplot2::theme( |
656 | 1x |
legend.justification = "left", |
657 | 1x |
legend.spacing = ggplot2::unit(0.2, "cm"), |
658 | 1x |
legend.margin = ggplot2::margin(0, 0, 0, 0, unit = "cm"), |
659 | 1x |
legend.title = ggplot2::element_text(face = "bold") |
660 |
) + |
|
661 | 1x |
ggplot2::scale_color_manual( |
662 | 1x |
values = c( |
663 | 1x |
"fixed" = color_fixed, |
664 | 1x |
"indicative" = color_indicative |
665 |
), |
|
666 | 1x |
aesthetics = c("fill", "colour") |
667 |
) + |
|
668 | 1x |
ggplot2::guides( |
669 | 1x |
fill = ggplot2::guide_legend(position = "top"), |
670 | 1x |
custom_mqi = ggplot2::guide_custom(grob_mqi, order = 1), |
671 | 1x |
custom_a = ggplot2::guide_custom( |
672 | 1x |
grob_a, |
673 | 1x |
order = 2, |
674 | 1x |
title = "\nUncertainty\nParameters" |
675 |
), |
|
676 | 1x |
custom_b = ggplot2::guide_custom(grob_b, order = 3), |
677 | 1x |
custom_rv = ggplot2::guide_custom(grob_rv, order = 4), |
678 | 1x |
custom_urv = ggplot2::guide_custom(grob_urv, order = 5), |
679 | 1x |
custom_siu = ggplot2::guide_custom( |
680 | 1x |
grob_siu, |
681 | 1x |
order = 6, |
682 | 1x |
title = "\nComplementary\nIndicators" |
683 |
), |
|
684 | 1x |
custom_sib = ggplot2::guide_custom(grob_sib, order = 7), |
685 | 1x |
custom_sir = ggplot2::guide_custom(grob_sir, order = 8), |
686 | 1x |
custom_sis = ggplot2::guide_custom(grob_sis, order = 9) |
687 |
) |
|
688 |
} |
|
689 |
} |
|
690 | ||
691 | 6x |
return(plt) |
692 |
} |
|
693 | ||
694 |
#' @author 'Trevor' on StackOverflow |
|
695 |
#' @source https://stackoverflow.com/questions/6862742/draw-a-circle-with-ggplot2 |
|
696 |
#' @noRd |
|
697 |
make_circle <- function(r, xc = 0, yc = 0) { |
|
698 | 12x |
x <- xc + r * cos(seq(0, pi, length.out = 100)) |
699 | 12x |
ymax <- yc + r * sin(seq(0, pi, length.out = 100)) |
700 | 12x |
ymin <- yc + r * sin(seq(0, -pi, length.out = 100)) |
701 | 12x |
data.frame(x = x, ymin = ymin, ymax = ymax) |
702 |
} |
|
703 | ||
704 |
#' Comma delimit two optional values |
|
705 |
#' @noRd |
|
706 |
combine <- function(a, b, percentage = FALSE) { |
|
707 | 24x |
if (percentage) { |
708 | 3x |
if (!is.null(a)) { |
709 | 3x |
a <- paste0(a * 100, "%") |
710 |
} |
|
711 | 3x |
if (!is.null(b)) { |
712 | 1x |
b <- paste0(b * 100, "%") |
713 |
} |
|
714 |
} |
|
715 | 24x |
paste(c(a, b) |> unique(), collapse = ", ") |
716 |
} |
|
717 | ||
718 |
#' Format performance indicators |
|
719 |
#' @noRd |
|
720 |
fmt_pi <- function(vec, x) { |
|
721 | 20x |
paste(round(vec[[x]], 2), collapse = ", ") |
722 |
} |
1 |
#' Conveniently construct a set of parameters for use in |
|
2 |
#' [mqor][mqor-package] functions |
|
3 |
#' |
|
4 |
#' This is a convenience function for assembling complete sets of parameters for |
|
5 |
#' use across `mqor`. It allows parameter sets to be simply re-used across |
|
6 |
#' functions. This function simply returns an R list which is indexed by |
|
7 |
#' `mqor` functions, meaning that users could construct the themselves, or even |
|
8 |
#' pass a row of a dataframe in its stead. |
|
9 |
#' |
|
10 |
#' @param rv **Reference value.** A numeric value, used to define the |
|
11 |
#' relationship between measurement uncertainty and concentration level, for |
|
12 |
#' each pollutant and time aggregation. `rv` is expressed in the same units as |
|
13 |
#' the concentration of the pollutant. |
|
14 |
#' |
|
15 |
#' @param u_rv **Maximum accepted relative measurement uncertainty.** A numeric |
|
16 |
#' value, expressed as a percent (`0.1` is equivalent to 10%). |
|
17 |
#' |
|
18 |
#' @param a **The alpha parameter.** Fraction of the maximum accepted |
|
19 |
#' measurement uncertainty that is not proportional to the concentration, |
|
20 |
#' ranging between `0` and `1`. It is pollutant and measurement-type specific. |
|
21 |
#' |
|
22 |
#' @param b **The beta parameter.** Ratio between the maximum accepted modelling |
|
23 |
#' and measurement uncertainties for short- and long-term averages. It is |
|
24 |
#' pollutant and measurement-type specific, and it determines the stringency |
|
25 |
#' of the MQO. The stringency factor β corresponds to the maximum ratio of |
|
26 |
#' uncertainty of modelling applications as specified in the AAQD. |
|
27 |
#' |
|
28 |
#' @returns a list of length `5`, with names `term`, `rv`, `u_rv`, `a`, and `b`. |
|
29 |
#' |
|
30 |
#' @family parameter constructors |
|
31 |
#' |
|
32 |
#' @export |
|
33 |
mqo_params <- function( |
|
34 |
rv = NULL, |
|
35 |
u_rv = NULL, |
|
36 |
a = NULL, |
|
37 |
b = NULL |
|
38 |
) { |
|
39 |
if ( |
|
40 | 12x |
is.null(rv) || |
41 | 12x |
is.null(u_rv) || |
42 | 12x |
is.null(a) || |
43 | 12x |
is.null(b) |
44 |
) { |
|
45 | ! |
cli::cli_abort( |
46 | ! |
c( |
47 | ! |
"i" = "Please pass a complete set of analysis parameters using {.fun mqor::mqo_params} to the {.field params} argument." |
48 |
), |
|
49 | ! |
call = NULL |
50 |
) |
|
51 |
} |
|
52 | ||
53 | 12x |
list( |
54 | 12x |
rv = rv, |
55 | 12x |
u_rv = u_rv, |
56 | 12x |
a = a, |
57 | 12x |
b = b |
58 |
) |
|
59 |
} |
|
60 | ||
61 |
#' Access 'default' parameters for use in [mqor][mqor-package] functions |
|
62 |
#' |
|
63 |
#' This function provides an alternative to [mqo_params()] by allowing users to |
|
64 |
#' simply filter the [default_params] dataset to create a [mqo_params()]-like |
|
65 |
#' output suitable for the `params` argument of other `mqor` functions. |
|
66 |
#' |
|
67 |
#' @param term,type,pollutant Any of the options present in |
|
68 |
#' `default_params$term`, `default_params$type` or `default_params$pollutant`. |
|
69 |
#' Used to filter the [default_params] dataset. |
|
70 |
#' |
|
71 |
#' @param resolution An optional value present in `default_params$time`, used to |
|
72 |
#' distinguish between parameters when multiple short-term time averages are |
|
73 |
#' present. |
|
74 |
#' |
|
75 |
#' @returns a list of length `5`, with names `term`, `rv`, `u_rv`, `a`, and `b`. |
|
76 |
#' |
|
77 |
#' @family parameter constructors |
|
78 |
#' |
|
79 |
#' @export |
|
80 |
mqo_params_default <- function( |
|
81 |
term = "", |
|
82 |
type = "", |
|
83 |
pollutant = "", |
|
84 |
resolution = NULL |
|
85 |
) { |
|
86 | 12x |
i_term <- rlang::arg_match(term, unique(mqor::default_params$term)) |
87 | 12x |
i_type <- rlang::arg_match(type, unique(mqor::default_params$type)) |
88 | 12x |
pollutant <- tolower(pollutant) |
89 | 12x |
pollutant[pollutant == "pm25"] <- "pm2.5" |
90 | ||
91 | 12x |
if (!missing(pollutant) && !pollutant %in% mqor::default_params$pollutant) { |
92 | ! |
cli::cli_abort( |
93 | ! |
"Cannot identify {.field pollutant}; please use {.fun mqor::mqo_params} or {.fun mqor::mqo_params_default} to construct a parameter set." |
94 |
) |
|
95 |
} |
|
96 | ||
97 | 12x |
i_pollutant <- rlang::arg_match( |
98 | 12x |
pollutant, |
99 | 12x |
unique(mqor::default_params$pollutant) |
100 |
) |
|
101 | ||
102 | 12x |
out <- |
103 | 12x |
dplyr::filter( |
104 | 12x |
mqor::default_params, |
105 | 12x |
term == i_term, |
106 | 12x |
type == i_type, |
107 | 12x |
pollutant == i_pollutant |
108 |
) |
|
109 | ||
110 | 12x |
if (nrow(out) > 1) { |
111 | ! |
if (is.null(resolution)) { |
112 | ! |
cli::cli_abort( |
113 | ! |
"Multiple possible parameters; please provide one of {.code {out$time}} to {.field resolution}." |
114 |
) |
|
115 |
} else { |
|
116 | ! |
rlang::arg_match(resolution, out$time) |
117 | ! |
out <- dplyr::filter(out, .data$time == resolution) |
118 |
} |
|
119 |
} |
|
120 | ||
121 | 12x |
mqo_params( |
122 | 12x |
rv = out$rv, |
123 | 12x |
u_rv = out$u_rv, |
124 | 12x |
a = out$a, |
125 | 12x |
b = out$b |
126 |
) |
|
127 |
} |
|
128 | ||
129 |
#' Convenience function for labelling columns in input `data` |
|
130 |
#' |
|
131 |
#' This function provides an interface to easily create a 'data dictionary' so |
|
132 |
#' that `{mqor}` functions can correctly identify where observed values, |
|
133 |
#' modelled values, site IDs, etc. are found in an input `data.frame`. |
|
134 |
#' |
|
135 |
#' @param obs,mod A character string to identify the columns associated with the |
|
136 |
#' observed (measured) (`obs`) and modelled (`mod`) concentrations. These will |
|
137 |
#' be used alongside `params` to calculate relevant metrics. |
|
138 |
#' |
|
139 |
#' @param site A character string to identify the column to group `obs` and |
|
140 |
#' `mod` by when calculating statistics. This does not strictly have to be a |
|
141 |
#' site name; a site ID or abbreviation would suffice, as long as the labels |
|
142 |
#' uniquely define separate sampling points. |
|
143 |
#' |
|
144 |
#' @param type A character string to identify the column to assign sites as |
|
145 |
#' either `"fixed"` or `"indicative"`. |
|
146 |
#' |
|
147 |
#' @param pollutant A character string to identify the column which |
|
148 |
#' distinguishes between different pollutant species. This column will be used |
|
149 |
#' to filter the data before it is calculated. |
|
150 |
#' |
|
151 |
#' @param date A character to identify a `POSIXct`/`Date` column. Only relevant |
|
152 |
#' for short-term data. |
|
153 |
#' |
|
154 |
#' @returns a list of length `5`, with names `obs`, `mod`, `site`, `type`, |
|
155 |
#' `pollutant` and `date`. |
|
156 |
#' |
|
157 |
#' @export |
|
158 |
mqo_dict <- function( |
|
159 |
obs = "obs", |
|
160 |
mod = "mod", |
|
161 |
site = "site", |
|
162 |
type = "type", |
|
163 |
pollutant = "param", |
|
164 |
date = "date" |
|
165 |
) { |
|
166 | 14x |
list( |
167 | 14x |
obs = obs, |
168 | 14x |
mod = mod, |
169 | 14x |
site = site, |
170 | 14x |
type = type, |
171 | 14x |
pollutant = pollutant, |
172 | 14x |
date = date |
173 |
) |
|
174 |
} |
1 |
#' Read Short- and Long-Term data in the recommended MQOR data structure |
|
2 |
#' |
|
3 |
#' This function reads one or more data files as well as an optional attributes |
|
4 |
#' file, combines them together, and formats them in a way useful for the |
|
5 |
#' `{mqor}` package. Short- and Long-Term data will be distinguished using the |
|
6 |
#' presence of the `"Month"` column, which should only exist in short-term data. |
|
7 |
#' |
|
8 |
#' @param file_data Path(s) to one or more CSV files containing short- or |
|
9 |
#' long-term data, formatted as requested by the DELTA tool. A mix of short- |
|
10 |
#' and long- term data will fail. |
|
11 |
#' |
|
12 |
#' @param file_attributes An optional path to an 'attributes' CSV file, which |
|
13 |
#' must contain a `"Time_Series"` column used to join the two datasets |
|
14 |
#' together. The attributes data can contain metadata about the main datasets, |
|
15 |
#' such as the name of the model, monitoring station coordinates, and so on. |
|
16 |
#' |
|
17 |
#' @param delim The delimiter used to separate columns in the input files. The |
|
18 |
#' default is `";"`. |
|
19 |
#' |
|
20 |
#' @author Jack Davison |
|
21 |
#' |
|
22 |
#' @export |
|
23 |
read_mqor <- function( |
|
24 |
file_data, |
|
25 |
file_attributes = NULL, |
|
26 |
delim = ";" |
|
27 |
) { |
|
28 |
# read all data files |
|
29 | ! |
df <- |
30 | ! |
purrr::map( |
31 | ! |
file_data, |
32 | ! |
\(x) { |
33 | ! |
data.table::fread(x, sep = delim) |> |
34 | ! |
dplyr::mutate(Time_Series = basename(x)) |
35 |
} |
|
36 |
) |> |
|
37 | ! |
dplyr::bind_rows() |> |
38 | ! |
tibble::tibble() |
39 | ||
40 |
# is it short or longterm? |
|
41 | ! |
if ("Month" %in% names(df)) { |
42 |
# if hour is missing, will be daily data - add it back here |
|
43 | ! |
if (!"Hour" %in% names(df)) { |
44 | ! |
df$Hour <- 0 |
45 |
} |
|
46 |
# combine y/m/d/h into date |
|
47 | ! |
df <- |
48 | ! |
dplyr::mutate( |
49 | ! |
df, |
50 | ! |
date = lubridate::make_datetime( |
51 | ! |
year = .data$Year, |
52 | ! |
month = .data$Month, |
53 | ! |
day = .data$Day, |
54 | ! |
hour = .data$Hour, |
55 | ! |
tz = "UTC" |
56 |
), |
|
57 | ! |
.after = "Pollutant" |
58 |
) |> |
|
59 | ! |
dplyr::select( |
60 | ! |
-dplyr::any_of(c("Month", "Day", "Hour")) |
61 |
) |
|
62 |
} |
|
63 | ||
64 |
# if attributes given, read in and join |
|
65 | ! |
if (!is.null(file_attributes)) { |
66 | ! |
attributes <- |
67 | ! |
data.table::fread(file_attributes, sep = delim) |> |
68 | ! |
tibble::tibble() |
69 | ||
70 | ! |
df <- |
71 | ! |
dplyr::left_join( |
72 | ! |
df, |
73 | ! |
attributes, |
74 | ! |
by = dplyr::join_by("Time_Series", "Year", "Pollutant", "Stat_Code"), |
75 | ! |
suffix = c("", "__mqorextra"), |
76 | ! |
relationship = "many-to-one" |
77 |
) |> |
|
78 | ! |
dplyr::select( |
79 | ! |
-dplyr::ends_with("__mqorextra") |
80 |
) |> |
|
81 | ! |
dplyr::rename( |
82 | ! |
type = "Measurement_Type" |
83 |
) |
|
84 |
} |
|
85 | ||
86 |
# rename some columns for convenience |
|
87 | ! |
df <- |
88 | ! |
df |> |
89 | ! |
dplyr::rename( |
90 | ! |
stat_code = "Stat_Code", |
91 | ! |
param = "Pollutant", |
92 | ! |
obs = "Observation", |
93 | ! |
mod = "Model" |
94 |
) |> |
|
95 | ! |
dplyr::rename_with(tolower) |
96 | ||
97 |
# if there's a "year" column, make it into "date" |
|
98 | ! |
if (!"date" %in% names(df)) { |
99 | ! |
names(df)[names(df) == "year"] <- "date" |
100 |
} else { |
|
101 | ! |
df$year <- NULL |
102 |
} |
|
103 | ||
104 |
# return data |
|
105 | ! |
return(df) |
106 |
} |
|
107 | ||
108 |
#' Write Short- and Long-Term data in the recommended MQOR data structure |
|
109 |
#' |
|
110 |
#' This function takes a `data.frame` already in R and writes it to the `{mqor}` |
|
111 |
#' flat file structure. |
|
112 |
#' |
|
113 |
#' @inheritParams data.table::fwrite |
|
114 |
#' |
|
115 |
#' @param data A `data.frame` to write to a file. |
|
116 |
#' |
|
117 |
#' @param delim The delimiter used to separate columns in the files. The default |
|
118 |
#' is `";"`. |
|
119 |
#' |
|
120 |
#' @param dict See [mqo_dict()] for more information. Acts as a data dictionary |
|
121 |
#' to specify the columns in the data `{mqor}` should use. |
|
122 |
#' |
|
123 |
#' @author Jack Davison |
|
124 |
#' |
|
125 |
#' @export |
|
126 |
write_mqor <- function(data, file = "", delim = ";", dict = mqor::mqo_dict()) { |
|
127 | ! |
data <- |
128 | ! |
dplyr::select( |
129 | ! |
data, |
130 | ! |
dplyr::all_of(c( |
131 | ! |
dict$site, |
132 | ! |
dict$pollutant, |
133 | ! |
dict$date, |
134 | ! |
dict$obs, |
135 | ! |
dict$mod |
136 |
)) |
|
137 |
) |
|
138 | ||
139 | ! |
names(data)[names(data) == dict$obs] <- "Observation" |
140 | ! |
names(data)[names(data) == dict$mod] <- "Model" |
141 | ! |
names(data)[names(data) == dict$site] <- "Stat_Code" |
142 | ! |
names(data)[names(data) == dict$pollutant] <- "Pollutant" |
143 | ||
144 | ! |
if (inherits(data[[dict$date]], c("POSIXct", "Date"))) { |
145 | ! |
data <- |
146 | ! |
dplyr::mutate( |
147 | ! |
data, |
148 | ! |
Year = lubridate::year(.data[[dict$date]]), |
149 | ! |
Month = lubridate::month(.data[[dict$date]]), |
150 | ! |
Day = lubridate::day(.data[[dict$date]]), |
151 | ! |
Hour = lubridate::hour(.data[[dict$date]]), |
152 | ! |
.after = "Pollutant", |
153 | ! |
.keep = "unused" |
154 |
) |
|
155 | ||
156 | ! |
if (all(data$Hour == 0)) { |
157 | ! |
data$Hour <- NULL |
158 |
} |
|
159 | ! |
} else if (is.numeric(data[[dict$date]])) { |
160 | ! |
names(data)[names(data) == dict$date] <- "Year" |
161 |
} |
|
162 | ||
163 | ! |
data.table::fwrite(data, file = file, sep = delim) |
164 | ! |
return(invisible(data)) |
165 |
} |
1 |
#' Automatic text formatting for mqor |
|
2 |
#' |
|
3 |
#' This function automatically formats character strings for use in plotting, |
|
4 |
#' applying subscripts, superscripts and Greek letters as appropriate. |
|
5 |
#' |
|
6 |
#' @param text A character vector |
|
7 |
#' @returns an expression for graphical evaluation |
|
8 |
#' @author Adapted from `openair::quickText()`. Original function authored by |
|
9 |
#' Karl Ropkins. |
|
10 |
#' |
|
11 |
#' @noRd |
|
12 |
format_text <- function(text) { |
|
13 | 4x |
ans <- paste("expression(paste('", text, " ", sep = "") |
14 | 4x |
ans <- gsub("NO2", "' 'NO' [2] * '", ans) |
15 | 4x |
ans <- gsub("no2", "' 'NO' [2] * '", ans) |
16 | 4x |
ans <- gsub("\\bno\\b", "NO", ans) |
17 | 4x |
ans <- gsub("\\bNOX\\b", "' 'NO' [x] * '", ans) |
18 | 4x |
ans <- gsub("\\bnox\\b", "' 'NO' [x] * '", ans) |
19 | 4x |
ans <- gsub("\\bNOx\\b", "' 'NO' [x] * '", ans) |
20 | 4x |
ans <- gsub("NH3", "' 'NH' [3] * '", ans) |
21 | 4x |
ans <- gsub("nh3", "' 'NH' [3] * '", ans) |
22 | 4x |
ans <- gsub("co ", "' 'CO ' '", ans) |
23 | 4x |
ans <- gsub("co,", "' 'CO,' '", ans) |
24 | 4x |
ans <- gsub("nmhc", "' 'NMHC' '", ans) |
25 | ||
26 | 4x |
ans <- if (nchar(as.character(text)) == 2 && length(grep("ws", text)) > 0) { |
27 | ! |
gsub("ws", "' 'wind spd.' '", ans) |
28 |
} else { |
|
29 | 4x |
ans |
30 |
} |
|
31 | 4x |
ans <- gsub("wd", "' 'wind dir.' '", ans) |
32 | 4x |
ans <- gsub("rh ", "' 'relative humidity' '", ans) |
33 | 4x |
ans <- gsub("PM10", "' 'PM' [10] * '", ans) |
34 | 4x |
ans <- gsub("pm10", "' 'PM' [10] * '", ans) |
35 | 4x |
ans <- gsub("pm1", "' 'PM' [1] * '", ans) |
36 | 4x |
ans <- gsub("PM1", "' 'PM' [1] * '", ans) |
37 | 4x |
ans <- gsub("PM4", "' 'PM' [4] * '", ans) |
38 | 4x |
ans <- gsub("pm4", "' 'PM' [4] * '", ans) |
39 | 4x |
ans <- gsub("PMtot", "' 'PM' [total] * '", ans) |
40 | 4x |
ans <- gsub("pmtot", "' 'PM' [total] * '", ans) |
41 | 4x |
ans <- gsub("pmc", "' 'PM' [coarse] * '", ans) |
42 | ||
43 | 4x |
ans <- gsub("pmcoarse", "' 'PM' [coarse] * '", ans) |
44 | 4x |
ans <- gsub("PMc", "' 'PM' [coarse] * '", ans) |
45 | 4x |
ans <- gsub("PMcoarse", "' 'PM' [coarse] * '", ans) |
46 | 4x |
ans <- gsub("pmf", "' 'PM' [fine] * '", ans) |
47 | 4x |
ans <- gsub("pmfine", "' 'PM' [fine] * '", ans) |
48 | 4x |
ans <- gsub("PMf", "' 'PM' [fine] * '", ans) |
49 | 4x |
ans <- gsub("PMfine", "' 'PM' [fine] * '", ans) |
50 | 4x |
ans <- gsub("PM2.5", "' 'PM' [2.5] * '", ans) |
51 | 4x |
ans <- gsub("pm2.5", "' 'PM' [2.5] * '", ans) |
52 | 4x |
ans <- gsub("pm25", "' 'PM' [2.5] * '", ans) |
53 | 4x |
ans <- gsub("PM2.5", "' 'PM' [2.5] * '", ans) |
54 | 4x |
ans <- gsub("PM25", "' 'PM' [2.5] * '", ans) |
55 | 4x |
ans <- gsub("pm25", "' 'PM' [2.5] * '", ans) |
56 | 4x |
ans <- gsub("O3", "' 'O' [3] * '", ans) |
57 | 4x |
ans <- gsub("o3", "' 'O' [3] * '", ans) |
58 | 4x |
ans <- gsub("ozone", "' 'O' [3] * '", ans) |
59 | 4x |
ans <- gsub("CO2", "' 'CO' [2] * '", ans) |
60 | 4x |
ans <- gsub("co2", "' 'CO' [2] * '", ans) |
61 | 4x |
ans <- gsub("SO2", "' 'SO' [2] * '", ans) |
62 | 4x |
ans <- gsub("so2", "' 'SO' [2] * '", ans) |
63 | 4x |
ans <- gsub("H2S", "' 'H' [2] * 'S''", ans) |
64 | 4x |
ans <- gsub("h2s", "' 'H' [2] * 'S''", ans) |
65 | 4x |
ans <- gsub("CH4", "' 'CH' [4] * '", ans) |
66 | 4x |
ans <- gsub("ch4", "' 'CH' [4] * '", ans) |
67 | 4x |
ans <- gsub("ug/m3", "' * mu * 'g m' ^-3 *'", ans) |
68 | 4x |
ans <- gsub("ug.m-3", "' * mu * 'g m' ^-3 *'", ans) |
69 | 4x |
ans <- gsub("ug m-3", "' * mu * 'g m' ^-3 *'", ans) |
70 | 4x |
ans <- gsub("ugm-3", "' * mu * 'g m' ^-3 *'", ans) |
71 | 4x |
ans <- gsub("mg/m3", "' * 'm' * 'g m' ^-3 *'", ans) |
72 | 4x |
ans <- gsub("mg.m-3", "' * 'm' * 'g m' ^-3 *'", ans) |
73 | 4x |
ans <- gsub("mg m-3", "' * 'm' * 'g m' ^-3 *'", ans) |
74 | 4x |
ans <- gsub("mgm-3", "' * 'm' * 'g m' ^-3 *'", ans) |
75 | 4x |
ans <- gsub("ng/m3", "' * 'n' * 'g m' ^-3 *'", ans) |
76 | 4x |
ans <- gsub("ng.m-3", "' * 'n' * 'g m' ^-3 *'", ans) |
77 | 4x |
ans <- gsub("ng m-3", "' * 'n' * 'g m' ^-3 *'", ans) |
78 | 4x |
ans <- gsub("ngm-3", "' * 'n' * 'g m' ^-3 *'", ans) |
79 | 4x |
ans <- gsub("m/s2", "' 'm s' ^-2 *'", ans) |
80 | 4x |
ans <- gsub("m/s", "' 'm s' ^-1 *'", ans) |
81 | 4x |
ans <- gsub("m.s-1", "' 'm s' ^-1 *'", ans) |
82 | 4x |
ans <- gsub("m s-1", "' 'm s' ^-1 *'", ans) |
83 | 4x |
ans <- gsub("km2", "' 'km' ^2 *'", ans) |
84 | 4x |
ans <- gsub("g/km", "' 'g km' ^-1 *'", ans) |
85 | 4x |
ans <- gsub("g/s", "' 'g s' ^-1 *'", ans) |
86 | 4x |
ans <- gsub("kW/t", "' 'kW t' ^-1 *'", ans) |
87 | 4x |
ans <- gsub("g/hour", "' 'g hour' ^-1 *'", ans) |
88 | 4x |
ans <- gsub("g/hr", "' 'g hour' ^-1 *'", ans) |
89 | 4x |
ans <- gsub("g/m3", "' 'g m' ^-3 *'", ans) |
90 | 4x |
ans <- gsub("g/kg", "' 'g kg' ^-1 *'", ans) |
91 | 4x |
ans <- gsub("cm-3", "' 'cm' ^-3 *'", ans) |
92 | 4x |
ans <- gsub("km/hr/s", "' 'km hr' ^-1 * ' s' ^-1 *'", ans) |
93 | 4x |
ans <- gsub("km/hour/s", "' 'km hr' ^-1 * ' s' ^-1 *'", ans) |
94 | 4x |
ans <- gsub("km/h/s", "km hr' ^-1 * ' s' ^-1 *'", ans) |
95 | 4x |
ans <- gsub("km/hr", "' 'km hr' ^-1 *'", ans) |
96 | 4x |
ans <- gsub("km/h", "' 'km hr' ^-1 *'", ans) |
97 | 4x |
ans <- gsub("km/hour", "' 'km hr' ^-1 *'", ans) |
98 | ||
99 | 4x |
ans <- paste(ans, "'))", sep = "") |
100 | ||
101 | 4x |
if (substr(ans, (nchar(ans) - 8), (nchar(ans) - 6)) == "] *") { |
102 | ! |
a <- ans |
103 | ! |
ans <- paste( |
104 | ! |
substr(a, 1, (nchar(a) - 7)), |
105 | ! |
substr(a, (nchar(a) - 5), nchar(a)), |
106 | ! |
sep = "" |
107 |
) |
|
108 |
} |
|
109 | ||
110 | 4x |
ans <- gsub("''", "", ans) |
111 | 4x |
ans <- gsub("' '", "", ans) |
112 | 4x |
ans <- gsub("\\* \\*", "~", ans) |
113 | 4x |
ans <- gsub("^expression\\(paste\\( \\*", "expression(paste(", ans) |
114 | 4x |
ans <- gsub("^expression\\(paste\\(\\*", "expression(paste(", ans) |
115 | ||
116 | 4x |
if (substr(ans, (nchar(ans) - 2), (nchar(ans) - 2)) == "*") { |
117 | ! |
a <- ans |
118 | ! |
ans <- paste( |
119 | ! |
substr(a, 1, (nchar(a) - 2)), |
120 |
" ' ' ", |
|
121 | ! |
substr(a, (nchar(a) - 1), nchar(a)), |
122 | ! |
sep = "" |
123 |
) |
|
124 |
} |
|
125 | ||
126 | 4x |
if (inherits(try(eval(parse(text = ans)), TRUE), "try-error") == FALSE) { |
127 | 4x |
ans <- eval(parse(text = ans)) |
128 |
} else { |
|
129 | ! |
ans <- text |
130 |
} |
|
131 | ||
132 | 4x |
return(ans) |
133 |
} |
1 |
#' Function to read CDF files in the DELTA format |
|
2 |
#' |
|
3 |
#' This function uses [ncdf4][ncdf4::ncdf4-package] to read a CDF file of |
|
4 |
#' short-term air quality data into R as a `data.frame` for further analysis in |
|
5 |
#' an `{mqor}` analysis pipeline. Alternatively, users can define a directory of |
|
6 |
#' delimited text files with [read_delta_data_delim()]. |
|
7 |
#' |
|
8 |
#' @param file A path to a .cdf file, formatted as requested by the DELTA tool. |
|
9 |
#' |
|
10 |
#' @param data_type One of `"obs"` or `"mod"`; used to correctly label the data |
|
11 |
#' as being observations or modelled data for later analysis. |
|
12 |
#' |
|
13 |
#' @return a [tibble][tibble::tibble-package] |
|
14 |
#' |
|
15 |
#' @family delta reading functions |
|
16 |
#' |
|
17 |
#' @export |
|
18 |
read_delta_data_cdf <- function(file, data_type) { |
|
19 | ! |
rlang::check_installed("ncdf4") |
20 | ||
21 | ! |
data_type <- rlang::arg_match(data_type, c("obs", "mod")) |
22 | ||
23 | ! |
nc <- ncdf4::nc_open(file) |
24 | ||
25 | ! |
model <- gsub(".cdf", "", basename(file)) |
26 | ||
27 | ! |
on.exit(ncdf4::nc_close(nc)) |
28 | ||
29 | ! |
params <- |
30 | ! |
ncdf4::ncatt_get(nc, varid = 0)$Parameters |> |
31 | ! |
as.raw() |> |
32 | ! |
rawToChar() |> |
33 | ! |
strsplit(" ") |> |
34 | ! |
(\(x) x[[1]])() |
35 | ||
36 | ! |
vars <- names(nc$var) |
37 | ||
38 | ! |
year <- ncdf4::ncatt_get(nc, 0)$Year |
39 | ! |
start <- ncdf4::ncatt_get(nc, 0)$StartHour + 1L |
40 | ! |
end <- ncdf4::ncatt_get(nc, 0)$EndHour + 1L |
41 | ||
42 | ! |
dates <- |
43 | ! |
seq( |
44 | ! |
ISOdate( |
45 | ! |
year = year, |
46 | ! |
month = 1, |
47 | ! |
day = 1, |
48 | ! |
hour = 0, |
49 | ! |
tz = "GMT" |
50 |
), |
|
51 | ! |
ISOdate( |
52 | ! |
year = year, |
53 | ! |
month = 12, |
54 | ! |
day = 31, |
55 | ! |
hour = 23, |
56 | ! |
tz = "GMT" |
57 |
), |
|
58 | ! |
by = "hour" |
59 | ! |
)[start:end] |
60 | ||
61 | ! |
data <- |
62 | ! |
purrr::map( |
63 | ! |
vars, |
64 | ! |
function(x) { |
65 | ! |
array <- ncdf4::ncvar_get(nc, x) |
66 | ! |
array |> |
67 | ! |
t() |> |
68 | ! |
as.data.frame() |> |
69 | ! |
stats::setNames(params) |> |
70 | ! |
dplyr::tibble() |> |
71 | ! |
dplyr::mutate(site = x, date = dates, .before = 0L) |
72 |
}, |
|
73 | ! |
.progress = TRUE |
74 |
) |> |
|
75 | ! |
dplyr::bind_rows() |> |
76 | ! |
tidyr::pivot_longer( |
77 | ! |
-c("site", "date"), |
78 | ! |
names_to = "param", |
79 | ! |
values_to = data_type |
80 |
) |
|
81 | ||
82 |
# drop invalid values |
|
83 | ! |
data[data_type][data[data_type] == -999] <- NA |
84 | ||
85 | ! |
if (data_type == "mod") { |
86 |
# if modelled data, add a 'model' column |
|
87 | ! |
data <- dplyr::mutate(data, model = model, .before = dplyr::everything()) |
88 |
} |
|
89 | ||
90 | ! |
return(data) |
91 |
} |
|
92 | ||
93 |
#' Function to read CSV files in the DELTA format |
|
94 |
#' |
|
95 |
#' This function uses [data.table::fread()] to read a directory of tabular files |
|
96 |
#' of short-term air quality data into R as a `data.frame` for further analysis |
|
97 |
#' in an `{mqor}` analysis pipeline. Alternatively, users can read a single |
|
98 |
#' netCDF file with [read_delta_data_cdf()]. |
|
99 |
#' |
|
100 |
#' @param path A path to a directory of delimited files, formatted as requested |
|
101 |
#' by the DELTA tool. |
|
102 |
#' @param data_type One of `"obs"` or `"mod"`; used to correctly label the data |
|
103 |
#' as being observations or modelled data for later analysis. |
|
104 |
#' @param model_name If `data_type = "mod"`, this value will be used to assign a |
|
105 |
#' model name to the data. |
|
106 |
#' @param delim The delimited used to separate columns in the input files. The |
|
107 |
#' default, `;`, is prescribed by the DELTA tool. |
|
108 |
#' @param pattern Passed to [dir()] to identify relevant files in `path`. Also |
|
109 |
#' used to clean the file names to attach site names to the data frames. |
|
110 |
#' |
|
111 |
#' @return a [tibble][tibble::tibble-package] |
|
112 |
#' |
|
113 |
#' @family delta reading functions |
|
114 |
#' |
|
115 |
#' @export |
|
116 |
read_delta_data_delim <- function( |
|
117 |
path, |
|
118 |
data_type, |
|
119 |
model_name = "MODEL", |
|
120 |
delim = ";", |
|
121 |
pattern = ".csv" |
|
122 |
) { |
|
123 | ! |
data_type <- rlang::arg_match(data_type, c("obs", "mod")) |
124 | ||
125 | ! |
data <- |
126 | ! |
purrr::map2( |
127 | ! |
.x = dir(path, full.names = TRUE, pattern = pattern), |
128 | ! |
.y = dir(path, full.names = FALSE, pattern = pattern), |
129 | ! |
.f = function(x, y) { |
130 | ! |
data.table::fread(x, sep = delim, na.strings = "-999") |> |
131 | ! |
dplyr::tibble() |> |
132 | ! |
dplyr::mutate(site = gsub(pattern, "", y), .before = 0L) |
133 |
}, |
|
134 | ! |
.progress = TRUE |
135 |
) |> |
|
136 | ! |
dplyr::bind_rows() |> |
137 | ! |
dplyr::mutate( |
138 | ! |
date = ISOdate( |
139 | ! |
year = .data$year, |
140 | ! |
month = .data$month, |
141 | ! |
day = .data$day, |
142 | ! |
hour = .data$hour, |
143 | ! |
tz = "GMT" |
144 |
), |
|
145 | ! |
.keep = "unused", |
146 | ! |
.after = "site" |
147 |
) |
|
148 | ||
149 | ! |
for (i in names(data)) { |
150 | ! |
if (all(is.na(data[[i]]))) { |
151 | ! |
data[i] <- NULL |
152 |
} |
|
153 |
} |
|
154 | ||
155 | ! |
data <- |
156 | ! |
tidyr::pivot_longer( |
157 | ! |
data, |
158 | ! |
-c("site", "date"), |
159 | ! |
names_to = "param", |
160 | ! |
values_to = data_type |
161 |
) |
|
162 | ||
163 | ! |
if (data_type == "mod") { |
164 |
# if modelled data, add a 'model' column |
|
165 | ! |
data <- dplyr::mutate( |
166 | ! |
data, |
167 | ! |
model = model_name, |
168 | ! |
.before = dplyr::everything() |
169 |
) |
|
170 |
} |
|
171 | ||
172 | ! |
return(data) |
173 |
} |
|
174 | ||
175 |
#' Function to read yearly CSV files in the DELTA format |
|
176 |
#' |
|
177 |
#' This function reads yearly data formatted as required by the DELTA tool, |
|
178 |
#' structured in a single file. Monitoring data structured in multiple files can |
|
179 |
#' alternatively be imported using [read_delta_yearly_dir()]. |
|
180 |
#' |
|
181 |
#' @param file The path to a single CSV file containing yearly data, formatted |
|
182 |
#' as requested by the DELTA tool. |
|
183 |
#' |
|
184 |
#' @inheritParams read_delta_data_delim |
|
185 |
#' |
|
186 |
#' @return a [tibble][tibble::tibble-package] |
|
187 |
#' |
|
188 |
#' @family delta reading functions |
|
189 |
#' |
|
190 |
#' @export |
|
191 |
read_delta_yearly_file <- function( |
|
192 |
file, |
|
193 |
data_type, |
|
194 |
model_name = "MODEL", |
|
195 |
delim = ";" |
|
196 |
) { |
|
197 | ! |
data_type <- rlang::arg_match(data_type, c("obs", "mod")) |
198 | ||
199 | ! |
lines <- readLines(file) |
200 | ! |
lines <- lines[!startsWith(lines, delim)] |
201 | ||
202 | ! |
colnames <- strsplit(lines[1], ";")[[1]] |
203 | ! |
data <- |
204 | ! |
utils::read.table( |
205 | ! |
text = lines[-1], |
206 | ! |
sep = delim, |
207 | ! |
col.names = c("site", colnames[-(1:2)]) |
208 |
) |> |
|
209 | ! |
dplyr::mutate(date = as.integer(colnames[2])) |> |
210 | ! |
tidyr::pivot_longer( |
211 | ! |
-c("site", "date"), |
212 | ! |
names_to = "param", |
213 | ! |
values_to = data_type |
214 |
) |
|
215 | ||
216 | ! |
if (data_type == "mod") { |
217 |
# if modelled data, add a 'model' column |
|
218 | ! |
data <- dplyr::mutate( |
219 | ! |
data, |
220 | ! |
model = model_name, |
221 | ! |
.before = dplyr::everything() |
222 |
) |
|
223 |
} |
|
224 | ||
225 | ! |
data[[data_type]][data[[data_type]] == -999] <- NA |
226 | ||
227 | ! |
return(data) |
228 |
} |
|
229 | ||
230 |
#' Function to read a directory of yearly CSV files in the DELTA format |
|
231 |
#' |
|
232 |
#' This function reads yearly data formatted as required by the DELTA tool, |
|
233 |
#' structured across multiple files. This format is only supported for |
|
234 |
#' monitoring data. Data structured in a single file can |
|
235 |
#' alternatively be imported using [read_delta_yearly_file()]. |
|
236 |
#' |
|
237 |
#' @inheritParams read_delta_data_delim |
|
238 |
#' |
|
239 |
#' @return a [tibble][tibble::tibble-package] |
|
240 |
#' |
|
241 |
#' @family delta reading functions |
|
242 |
#' |
|
243 |
#' @export |
|
244 |
read_delta_yearly_dir <- function( |
|
245 |
path, |
|
246 |
delim = ";", |
|
247 |
pattern = ".csv" |
|
248 |
) { |
|
249 | ! |
files <- dir(path, full.names = TRUE, pattern = pattern) |
250 | ||
251 | ! |
read_file <- function(file) { |
252 | ! |
lines <- readLines(file) |
253 | ! |
lines <- lines[!startsWith(lines, delim)] |
254 | ||
255 | ! |
colnames <- strsplit(lines[1], ";")[[1]] |
256 | ||
257 | ! |
utils::read.table( |
258 | ! |
text = lines[2], |
259 | ! |
sep = ";", |
260 | ! |
col.names = colnames[-(1:2)] |
261 |
) |> |
|
262 | ! |
dplyr::mutate( |
263 | ! |
date = as.integer(colnames[2]), |
264 | ! |
site = gsub(pattern, "", basename(file)) |
265 |
) |> |
|
266 | ! |
tidyr::pivot_longer( |
267 | ! |
-c("site", "date"), |
268 | ! |
names_to = "param", |
269 | ! |
values_to = "obs" |
270 |
) |> |
|
271 | ! |
dplyr::mutate(obs = ifelse(.data$obs == -999, NA, .data$obs)) |
272 |
} |
|
273 | ||
274 | ! |
purrr::map(files, read_file) |> |
275 | ! |
dplyr::bind_rows() |
276 |
} |
1 |
#' Function to read DELTA tool resource files |
|
2 |
#' |
|
3 |
#' These functions read selected files from the DELTA tool's `resource` |
|
4 |
#' directory. The most useful file for `{mqor}` is likely the `startup` file as |
|
5 |
#' this contains vital monitoring station metadata, but this function also |
|
6 |
#' supports the `MyDeltaInput.ini`, `update.ini` and `init.ini` files for |
|
7 |
#' completeness. |
|
8 |
#' |
|
9 |
#' @param file A path to an appropriate DELTA tool resource file; one of |
|
10 |
#' `init.ini`, `MyDeltaInput.dat`, `startup_*.ini`, or `update.ini`. |
|
11 |
#' |
|
12 |
#' @return a list |
|
13 |
#' |
|
14 |
#' @family delta reading functions |
|
15 |
#' |
|
16 |
#' @export |
|
17 |
read_delta_resource <- function(file) { |
|
18 | ! |
if (grepl("startup", file)) { |
19 | ! |
lines <- readLines(file) |
20 | ||
21 | ! |
id <- which(lines %in% c("[MODEL]", "[PARAMETERS]", "[MONITORING]")) |
22 | ||
23 | ! |
model_lines <- lines[(id[1] + 1L):(id[2] - 1L)] |
24 | ! |
model_lines <- model_lines[!grepl(";", model_lines)] |
25 | ||
26 | ! |
year <- as.integer(model_lines[1]) |
27 | ! |
frequency <- model_lines[2] |
28 | ! |
scale <- model_lines[3] |
29 | ||
30 | ! |
param_lines <- lines[(id[2] + 1L):(id[3] - 1L)] |
31 | ! |
param_lines <- param_lines[!startsWith(param_lines, ";")] |
32 | ! |
params <- data.table::fread(text = c("param;type;unit", param_lines)) |> |
33 | ! |
dplyr::tibble() |
34 | ||
35 | ! |
monitoring_lines <- lines[(id[3] + 1L):length(lines)] |
36 | ! |
monitoring_lines[!endsWith(monitoring_lines, ";")] <- paste0( |
37 | ! |
monitoring_lines[!endsWith(monitoring_lines, ";")], |
38 |
";" |
|
39 |
) |
|
40 | ||
41 | ! |
monitoring <- |
42 | ! |
data.table::fread(text = monitoring_lines) |> |
43 | ! |
dplyr::tibble() |> |
44 | ! |
dplyr::select(-dplyr::any_of("V13")) |> |
45 | ! |
dplyr::rename_with(.fn = function(x) gsub(" ", "_", tolower(x))) |> |
46 | ! |
dplyr::mutate( |
47 | ! |
listofvariables = strsplit(.data$listofvariables, "\\*") |
48 |
) |
|
49 | ||
50 | ! |
out <- list( |
51 | ! |
year = year, |
52 | ! |
frequency = frequency, |
53 | ! |
scale = scale, |
54 | ! |
params = params, |
55 | ! |
monitoring = monitoring |
56 |
) |
|
57 | ! |
} else if (grepl("update.ini", file) || grepl("init.ini", file)) { |
58 | ! |
x <- readLines(file) |
59 | ! |
x <- x[ |
60 | ! |
!startsWith(x, ";") & !startsWith(x, "#") & !startsWith(x, "[") & x != "" |
61 |
] |
|
62 | ! |
out <- |
63 | ! |
strsplit(x, "=") |> |
64 | ! |
purrr::map(.f = \(x) list(x[[2]]) |> stats::setNames(x[[1]])) |> |
65 | ! |
purrr::flatten() |
66 | ! |
} else if (grepl("MyDeltaInput", file)) { |
67 | ! |
x <- readLines(file) |
68 | ! |
x <- x[ |
69 | ! |
!startsWith(x, ";") & !startsWith(x, "#") & !startsWith(x, "[") & x != "" |
70 |
] |
|
71 | ! |
x <- gsub("\\", "/", x, fixed = TRUE) |
72 | ! |
out <- as.list(stats::setNames(x, c("startup", "modeling", "monitoring"))) |
73 |
} else { |
|
74 | ! |
cli::cli_abort( |
75 | ! |
"File not recognised; please provide one of the 'init.ini', 'MyDeltaInput.dat', 'startup_*.ini' or 'update.ini' files found in the DELTA 'resource' directory." |
76 |
) |
|
77 |
} |
|
78 | ||
79 | ! |
out$file <- file |
80 | ||
81 | ! |
return(out) |
82 |
} |
1 |
#' Convert the 'observation' and 'modelled' columns of a `mqor`-ready dataset |
|
2 |
#' into rolling means |
|
3 |
#' |
|
4 |
#' This function takes a dataset prepared for `mqo` calculations and replaces |
|
5 |
#' the `observation` and `modelled` data columns with a rolling equivalent. |
|
6 |
#' Rolling values are calculated *within* groups of any categorical columns |
|
7 |
#' present in the data (likely the "pollutant" and "site" columns, along with |
|
8 |
#' any metadata). |
|
9 |
#' |
|
10 |
#' @inheritParams summarise_mqo_stats |
|
11 |
#' |
|
12 |
#' @param window_size Integer vector giving rolling window size(s). This is the |
|
13 |
#' total number of included values. Defaults to `8L`, which is the most useful |
|
14 |
#' for ozone statistics. |
|
15 |
#' |
|
16 |
#' @param min_coverage The minimum data coverage percent, expressed as a decimal |
|
17 |
#' (i.e., this option should be between `0` and `1`, representing 0% and 100%). |
|
18 |
#' |
|
19 |
#' @param progress Show a progress bar? Passed to [purrr::map()]. |
|
20 |
#' |
|
21 |
#' @seealso `openair::rollingMean()` for a more flexible and performant version |
|
22 |
#' of `mutate_rolling_mean()`. |
|
23 |
#' |
|
24 |
#' @family data utilities |
|
25 |
#' |
|
26 |
#' @export |
|
27 |
mutate_rolling_mean <- function( |
|
28 |
data, |
|
29 |
window_size = 8L, |
|
30 |
min_coverage = 0.75, |
|
31 |
progress = TRUE, |
|
32 |
dict = mqor::mqo_dict() |
|
33 |
) { |
|
34 | ! |
validate_coverage(min_coverage) |
35 | ! |
data <- dplyr::arrange(data, .data[[dict$date]]) |
36 | ||
37 | ! |
cat_columns <- |
38 | ! |
data |> |
39 | ! |
dplyr::select(dplyr::where(is.character) | dplyr::where(is.factor)) |> |
40 | ! |
names() |
41 | ||
42 | ! |
purrr::map( |
43 | ! |
split(data, data[c(cat_columns)], drop = TRUE), |
44 | ! |
\(x) { |
45 | ! |
x[[dict$obs]] <- data.table::frollmean( |
46 | ! |
x = x[[dict$obs]], |
47 | ! |
n = window_size, |
48 | ! |
align = "right" |
49 |
) |
|
50 | ||
51 | ! |
x[[dict$mod]] <- data.table::frollmean( |
52 | ! |
x = x[[dict$mod]], |
53 | ! |
n = window_size, |
54 | ! |
align = "right" |
55 |
) |
|
56 | ||
57 | ! |
flag_obs <- !is.na(x[[dict$obs]]) |
58 | ! |
obs_dc <- data.table::frollmean( |
59 | ! |
x = flag_obs, |
60 | ! |
n = window_size, |
61 | ! |
align = "right" |
62 |
) |
|
63 | ||
64 | ! |
flag_mod <- !is.na(x[[dict$mod]]) |
65 | ! |
mod_dc <- data.table::frollmean( |
66 | ! |
x = flag_mod, |
67 | ! |
n = window_size, |
68 | ! |
align = "right" |
69 |
) |
|
70 | ||
71 | ! |
x[[dict$obs]] <- dplyr::if_else(obs_dc <= min_coverage, NA, x[[dict$obs]]) |
72 | ! |
x[[dict$mod]] <- dplyr::if_else(mod_dc <= min_coverage, NA, x[[dict$mod]]) |
73 | ||
74 | ! |
return(x) |
75 |
}, |
|
76 | ! |
.progress = progress |
77 |
) |> |
|
78 | ! |
dplyr::bind_rows() |
79 |
} |
|
80 | ||
81 |
#' Calculate daily summaries of a `mqor`-ready dataset |
|
82 |
#' |
|
83 |
#' These functions take a dataset prepared for `mqo` calculations and calculates |
|
84 |
#' a daily or annual mean, maximum or minimum value. Averages are calculated |
|
85 |
#' *within* groups of any categorical columns present in the data (likely the |
|
86 |
#' "pollutant" and "site" columns, along with any metadata). |
|
87 |
#' |
|
88 |
#' @inheritParams summarise_mqo_stats |
|
89 |
#' |
|
90 |
#' @param statistic One of `"mean"`, `"min"`, or `"max"`, representing the |
|
91 |
#' average, minimum or maximum |
|
92 |
#' |
|
93 |
#' @param min_coverage The minimum data coverage percent, expressed as a decimal |
|
94 |
#' (i.e., this option should be between `0` and `1`, representing 0% and |
|
95 |
#' 100%). Note that calculations of data coverage can only be calculated for |
|
96 |
#' the data presented; the user must provide a complete timeseries for an |
|
97 |
#' accurate coverage calculation. |
|
98 |
#' |
|
99 |
#' @seealso `openair::timeAverage()` for a more flexible and performant version |
|
100 |
#' of `summarise_daily()`. |
|
101 |
#' |
|
102 |
#' @family data utilities |
|
103 |
#' |
|
104 |
#' @rdname summarise-temporal |
|
105 |
#' @order 1 |
|
106 |
#' @export |
|
107 |
summarise_daily <- function( |
|
108 |
data, |
|
109 |
statistic = "mean", |
|
110 |
min_coverage = 0.75, |
|
111 |
dict = mqor::mqo_dict() |
|
112 |
) { |
|
113 | ! |
summarise_temporal( |
114 | ! |
data = data, |
115 | ! |
statistic = statistic, |
116 | ! |
min_coverage = min_coverage, |
117 | ! |
dict = dict, |
118 | ! |
floor = "day" |
119 |
) |
|
120 |
} |
|
121 | ||
122 |
#' @rdname summarise-temporal |
|
123 |
#' @order 2 |
|
124 |
#' @export |
|
125 |
summarise_annual <- function( |
|
126 |
data, |
|
127 |
statistic = "mean", |
|
128 |
min_coverage = 0.75, |
|
129 |
dict = mqor::mqo_dict() |
|
130 |
) { |
|
131 | ! |
data <- |
132 | ! |
summarise_temporal( |
133 | ! |
data = data, |
134 | ! |
statistic = statistic, |
135 | ! |
min_coverage = min_coverage, |
136 | ! |
dict = dict, |
137 | ! |
floor = "year" |
138 |
) |
|
139 | ||
140 | ! |
data[[dict$date]] <- lubridate::year(data[[dict$date]]) |
141 | ||
142 | ! |
return(data) |
143 |
} |
|
144 | ||
145 |
#' @noRd |
|
146 |
summarise_temporal <- function( |
|
147 |
data, |
|
148 |
statistic, |
|
149 |
min_coverage, |
|
150 |
dict, |
|
151 |
floor = c("day", "year") |
|
152 |
) { |
|
153 | ! |
validate_coverage(min_coverage) |
154 | ! |
fun <- get_fun(statistic) |
155 | ||
156 |
# get categorical columns to summarise by |
|
157 | ! |
cat_columns <- |
158 | ! |
data |> |
159 | ! |
dplyr::select(dplyr::where(is.character) | dplyr::where(is.factor)) |> |
160 | ! |
names() |
161 | ||
162 |
# summarise data |
|
163 | ! |
out <- |
164 | ! |
data |> |
165 | ! |
dplyr::mutate(date = lubridate::floor_date(.data[[dict$date]], floor)) |> |
166 | ! |
dplyr::summarise( |
167 | ! |
obs = fun(.data[[dict$obs]], min_coverage), |
168 | ! |
mod = fun(.data[[dict$mod]], min_coverage), |
169 | ! |
.by = dplyr::all_of(c("date", cat_columns)) |
170 |
) |
|
171 | ||
172 |
# restore original modelled/observed names |
|
173 | ! |
names(out)[names(out) == "mod"] <- dict$mod |
174 | ! |
names(out)[names(out) == "obs"] <- dict$obs |
175 | ! |
names(out)[names(out) == "date"] <- dict$date |
176 | ||
177 |
# return data |
|
178 | ! |
return(out) |
179 |
} |
|
180 | ||
181 | ||
182 |
#' @noRd |
|
183 |
get_fun <- function(statistic) { |
|
184 | ! |
if (statistic == "mean") { |
185 | ! |
fun <- safe_mean |
186 | ! |
} else if (statistic == "max") { |
187 | ! |
fun <- safe_max |
188 | ! |
} else if (statistic == "min") { |
189 | ! |
fun <- safe_min |
190 |
} |
|
191 |
} |
|
192 | ||
193 |
#' @noRd |
|
194 |
safe_mean <- function(x, cov) { |
|
195 | ! |
if (all(is.na(x)) | mean(!is.na(x)) < cov) { |
196 |
NA |
|
197 |
} else { |
|
198 | ! |
mean(x, na.rm = TRUE) |
199 |
} |
|
200 |
} |
|
201 | ||
202 |
#' @noRd |
|
203 |
safe_max <- function(x, cov) { |
|
204 | ! |
if (all(is.na(x)) | mean(!is.na(x)) < cov) { |
205 |
NA |
|
206 |
} else { |
|
207 | ! |
max(x, na.rm = TRUE) |
208 |
} |
|
209 |
} |
|
210 | ||
211 |
#' @noRd |
|
212 |
safe_min <- function(x, cov) { |
|
213 | ! |
if (all(is.na(x)) | mean(!is.na(x)) < cov) { |
214 |
NA |
|
215 |
} else { |
|
216 | ! |
max(x, na.rm = TRUE) |
217 |
} |
|
218 |
} |
|
219 | ||
220 |
#' @noRd |
|
221 |
validate_coverage <- function(min_coverage) { |
|
222 | ! |
if (min_coverage < 0 || min_coverage > 1 || !is.numeric(min_coverage)) { |
223 | ! |
cli::cli_abort( |
224 | ! |
"{.field min_coverage} must be a number between {0L} and {1L}." |
225 |
) |
|
226 |
} |
|
227 |
} |
|
228 | ||
229 |
#' Validate that all observations have a matching modelled value, and vice-versa |
|
230 |
#' |
|
231 |
#' When calculating statistics, it is important that the methodology is applied |
|
232 |
#' to observation-model pairs. This function can help identify and fix this situation. |
|
233 |
#' |
|
234 |
#' @param data A `data.frame` with numeric columns `dict$obs` and `dict$mod`. |
|
235 |
#' |
|
236 |
#' @param mode One of `"fix"` (the default), `"error"` or `"warn"`. If `"fix"` |
|
237 |
#' the function will return a dataframe with any mismatched values set to |
|
238 |
#' `NA`. If `"error"` or `"warn"`, the the function will either error or warn |
|
239 |
#' if mismatched values are detected. |
|
240 |
#' |
|
241 |
#' @inheritParams summarise_mqo_stats |
|
242 |
#' |
|
243 |
#' @author Jack Davison |
|
244 |
#' @family data utilities |
|
245 |
#' @export |
|
246 |
validate_mod_obs_pairs <- function( |
|
247 |
data, |
|
248 |
mode = c("fix", "error", "warn"), |
|
249 |
dict = mqor::mqo_dict() |
|
250 |
) { |
|
251 | ! |
mode <- rlang::arg_match(mode) |
252 | ||
253 | ! |
if (mode == "fix") { |
254 | ! |
data <- |
255 | ! |
dplyr::mutate( |
256 | ! |
data, |
257 | ! |
`__new_obs__` = dplyr::if_else( |
258 | ! |
is.na(.data[[dict$mod]]), |
259 | ! |
NA, |
260 | ! |
.data[[dict$obs]] |
261 |
), |
|
262 | ! |
`__new_mod__` = dplyr::if_else( |
263 | ! |
is.na(.data[[dict$obs]]), |
264 | ! |
NA, |
265 | ! |
.data[[dict$mod]] |
266 |
) |
|
267 |
) |
|
268 | ||
269 | ! |
data[[dict$obs]] <- data[["__new_obs__"]] |
270 | ! |
data[[dict$mod]] <- data[["__new_mod__"]] |
271 | ! |
data[["__new_mod__"]] <- data[["__new_obs__"]] <- NULL |
272 |
} |
|
273 | ||
274 | ! |
if (mode %in% c("error", "warn")) { |
275 | ! |
if (mode == "error") { |
276 | ! |
fun <- cli::cli_abort |
277 |
} else { |
|
278 | ! |
fun <- cli::cli_warn |
279 |
} |
|
280 | ! |
id <- is.na(data[[dict$mod]]) != is.na(data[[dict$obs]]) |
281 | ||
282 | ! |
if (any(id)) { |
283 | ! |
fun( |
284 | ! |
"Some modelled values do not have matching observations, or vice-versa." |
285 |
) |
|
286 |
} |
|
287 |
} |
|
288 | ||
289 | ! |
return(data) |
290 |
} |
1 |
#' Calculate a ranked percentile of a numeric vector |
|
2 |
#' |
|
3 |
#' This function provides a convenient method to calculate a percentile value |
|
4 |
#' from a numeric vector. The vector, `x`, is ranked in ascending order and the |
|
5 |
#' *n*th percentile value is inferred by linear interpolation. |
|
6 |
#' |
|
7 |
#' @param x A numeric vector from which to calculate a percentile value. |
|
8 |
#' |
|
9 |
#' @param quantile A quantile value between `0` and `1`. The default, `0.9`, |
|
10 |
#' calculates the 90th percentile. |
|
11 |
#' |
|
12 |
#' @inheritParams stats::quantile |
|
13 |
#' |
|
14 |
#' @family data utilities |
|
15 |
#' |
|
16 |
#' @author Jack Davison |
|
17 |
#' |
|
18 |
#' @examples |
|
19 |
#' mqis <- seq(0.1, 1, by = 0.1) |
|
20 |
#' |
|
21 |
#' mqo_percentile(mqis) |
|
22 |
#' |
|
23 |
#' mqo_percentile(mqis[1:9]) |
|
24 |
#' |
|
25 |
#' @export |
|
26 |
mqo_percentile <- function(x, quantile = 0.9, na.rm = FALSE) { |
|
27 | 20x |
if (!na.rm & anyNA(x)) { |
28 | 1x |
cli::cli_abort( |
29 | 1x |
"Missing values in {.field x} not allowed if {.field na.rm} is {FALSE}." |
30 |
) |
|
31 |
} |
|
32 | ||
33 | 19x |
if (!dplyr::between(quantile, 0, 1)) { |
34 | 1x |
cli::cli_abort("{.field quantile} must be between {0} and {1}.") |
35 |
} |
|
36 | ||
37 | 18x |
x <- x[!is.na(x)] |
38 | ||
39 | 18x |
x <- sort(x) |
40 | ||
41 | 18x |
n <- floor(length(x) * quantile) |
42 | ||
43 | 18x |
d <- (length(x) * quantile) - n |
44 | ||
45 | 18x |
return(x[n] + (x[n + 1] - x[n]) * d) |
46 |
} |
|
47 | ||
48 |
#' @noRd |
|
49 |
determine_term <- function(data, term, dict) { |
|
50 | 10x |
if (is.null(term)) { |
51 | 8x |
sites <- data[[dict$site]] |
52 | 8x |
x <- as.vector(table(sites)) |
53 | 8x |
if (any(x > 1)) { |
54 | 4x |
term <- "short" |
55 |
} else { |
|
56 | 4x |
term <- "long" |
57 |
} |
|
58 | 8x |
cli::cli_inform(c( |
59 | 8x |
"!" = "{.field term} assumed to be '{term}'.", |
60 | 8x |
"i" = "If this is incorrect, please specify the data's term using the {.field term} argument." |
61 |
)) |
|
62 |
} else { |
|
63 | 2x |
term <- validate_term(term) |
64 |
} |
|
65 | ||
66 | 9x |
return(term) |
67 |
} |
|
68 | ||
69 |
#' @noRd |
|
70 |
determine_resolution <- function(data, term, dict) { |
|
71 | 12x |
if (term == "long") { |
72 | 6x |
return("annual") |
73 |
} |
|
74 | ||
75 | 6x |
counts <- |
76 | 6x |
purrr::map( |
77 | 6x |
split(data, data[[dict$site]]), |
78 | 6x |
function(x) { |
79 | 90x |
difftime(x[[dict$date]], dplyr::lag(x[[dict$date]]), units = "secs") |
80 |
} |
|
81 |
) |> |
|
82 | 6x |
purrr::list_c() |> |
83 | 6x |
table() |
84 | ||
85 | 6x |
max_splits <- as.numeric(names(sort(counts)[1])) |
86 | ||
87 | 6x |
if (max_splits == 86400) { |
88 | 6x |
return("daily") |
89 | ! |
} else if (max_splits == 3600) { |
90 | ! |
return("hourly") |
91 |
} else { |
|
92 | ! |
cli::cli_abort( |
93 | ! |
"Cannot determine time resolution of data, please use {.fun mqor::mqo_params} or {.fun mqor::mqo_params_default} directly." |
94 |
) |
|
95 |
} |
|
96 |
} |
1 |
#' Function to read DELTA tool configuration files |
|
2 |
#' |
|
3 |
#' These functions read selected files from the DELTA tool's `configuration` |
|
4 |
#' directory. These are likely not overly useful for `{mqor}` as it currently |
|
5 |
#' stands, but may be useful in future if further features of the DELTA toolkit |
|
6 |
#' are incorporated into `{mqor}` or similar packages. |
|
7 |
#' |
|
8 |
#' @param file A path to an appropriate DELTA tool configuration file, e.g., |
|
9 |
#' `axis_type.dat`. |
|
10 |
#' |
|
11 |
#' @return a list |
|
12 |
#' |
|
13 |
#' @family delta reading functions |
|
14 |
#' |
|
15 |
#' @export |
|
16 |
read_delta_config <- function(file) { |
|
17 | ! |
if (grepl("aqibounds", file)) { |
18 | ! |
read_aqibounds(file) |
19 |
} else { |
|
20 | ! |
read_config(file) |
21 |
} |
|
22 |
} |
|
23 | ||
24 |
#' Read any config file |
|
25 |
#' @noRd |
|
26 |
read_config <- function(file) { |
|
27 |
# read raw data - drop anything beginning with ; or # |
|
28 | ! |
x <- readLines(file) |
29 | ! |
x <- x[!startsWith(x, ";")] |
30 | ! |
x <- x[!startsWith(x, "#")] |
31 | ||
32 |
# get first row and format as vector of headers |
|
33 | ! |
header <- x[1] |
34 | ! |
header <- gsub("[", "", header, fixed = TRUE) |
35 | ! |
header <- gsub("]", "", header, fixed = TRUE) |
36 | ! |
header <- strsplit(header, ": ")[[1]][2] |
37 | ||
38 |
# some are delimited by commas, some by ; |
|
39 | ! |
delim <- "," |
40 | ! |
if (!grepl(",", header)) { |
41 | ! |
delim <- ";" |
42 |
} |
|
43 | ||
44 |
# split headers into vector, ignoring delimters within parentheses |
|
45 | ! |
split_outside_parentheses <- function(text) { |
46 |
# Pattern: match a comma not inside parentheses |
|
47 | ! |
pattern <- paste0(delim, "(?![^()]*\\))") |
48 | ||
49 |
# Split the string using the pattern |
|
50 | ! |
result <- strsplit(text, pattern, perl = TRUE)[[1]] |
51 | ||
52 |
# Trim whitespace |
|
53 | ! |
result <- trimws(result) |
54 | ||
55 | ! |
return(result) |
56 |
} |
|
57 | ||
58 |
# tidy up names |
|
59 | ! |
header <- split_outside_parentheses(header) |
60 | ! |
header <- header[header != "" & header != "\""] |
61 | ! |
header <- gsub("\"", "", header, fixed = TRUE) |
62 | ||
63 |
# read in data |
|
64 | ! |
if (length(x[-1]) != 0) { |
65 | ! |
out <- utils::read.delim(text = x[-1], sep = ";", header = F) |
66 | ! |
out <- stats::setNames(out, header[1:ncol(out)]) |
67 | ||
68 | ! |
for (i in names(out)) { |
69 | ! |
if (all(is.na(out[[i]]))) { |
70 | ! |
out[[i]] <- NULL |
71 |
} |
|
72 |
} |
|
73 |
} else { |
|
74 |
# if empty, return empty dataframe |
|
75 | ! |
out <- data.frame() |
76 | ! |
for (i in header) { |
77 | ! |
out <- cbind(out, data.frame(character(0)) |> stats::setNames(i)) |
78 |
} |
|
79 |
} |
|
80 | ||
81 | ! |
out$file <- file |
82 | ||
83 | ! |
return(tibble::tibble(out)) |
84 |
} |
|
85 | ||
86 |
#' Special handling for the AQIBounds |
|
87 |
#' @noRd |
|
88 |
read_aqibounds <- function(file) { |
|
89 | ! |
x <- readLines(file) |
90 | ! |
x <- x[!startsWith(x, ";") & !startsWith(x, "#")] |
91 | ! |
x <- x[x != ""] |
92 | ! |
aqi_tables <- |
93 | ! |
tibble::enframe(x, name = NULL) |> |
94 | ! |
dplyr::mutate(id = ifelse(grepl("ID", .data$value), .data$value, NA)) |> |
95 | ! |
tidyr::fill("id") |> |
96 | ! |
dplyr::distinct() |> |
97 | ! |
split(~id, drop = TRUE) |
98 | ||
99 | ! |
read_aqi_tbl <- function(aqi) { |
100 | ! |
aqi <- aqi$value |
101 | ! |
aqi <- aqi[aqi != "END"] |
102 | ! |
name <- trimws(strsplit(aqi[1], "=")[[1]][2]) |
103 | ! |
ncat <- trimws(strsplit(aqi[2], "=")[[1]][2]) |
104 | ! |
cat_names <- strsplit(trimws(strsplit(aqi[8], "=")[[1]][2]), ",")[[1]] |> |
105 | ! |
trimws() |
106 | ||
107 | ! |
n <- which(startsWith(aqi, "Cat_Names")) - 1 |
108 | ||
109 | ! |
lims <- |
110 | ! |
strsplit(trimws(aqi[3:n]), "=") |> |
111 | ! |
purrr::map(trimws) |
112 | ||
113 | ! |
purrr::map( |
114 | ! |
lims, |
115 | ! |
function(x) { |
116 | ! |
data.frame( |
117 | ! |
value = strsplit(x[[2]], ", ")[[1]] |> trimws() |> as.numeric() |
118 |
) |> |
|
119 | ! |
stats::setNames(x[[1]]) |
120 |
} |
|
121 |
) |> |
|
122 | ! |
dplyr::bind_cols() |> |
123 | ! |
dplyr::slice_tail(n = -1) |> |
124 | ! |
dplyr::mutate( |
125 | ! |
aqi = name, |
126 | ! |
category = rev(cat_names), |
127 | ! |
.before = dplyr::everything() |
128 |
) |
|
129 |
} |
|
130 | ||
131 | ! |
purrr::map(aqi_tables, read_aqi_tbl) |> |
132 | ! |
dplyr::bind_rows() |> |
133 | ! |
dplyr::tibble() |> |
134 | ! |
tidyr::pivot_longer( |
135 | ! |
cols = -c("aqi", "category"), |
136 | ! |
values_to = "value", |
137 | ! |
names_to = "param" |
138 |
) |
|
139 |
} |
1 |
#' Plot a bar chart visualising the Modelling Quality Indicator (MQI) |
|
2 |
#' |
|
3 |
#' This function produces an ordered bar chart visualising the modelling quality |
|
4 |
#' indicator (MQI) for each `site` in the dataset. It also draws the 90th |
|
5 |
#' percentile of MQIs as a horizontal line; if this is below unity (`1`) the |
|
6 |
#' Modelling Quality Objective has been met. Unlike [plot_comparison_bars()], |
|
7 |
#' both long-term and short-term MQIs/MQOs are visualised identically - although |
|
8 |
#' input `params` and calculated `mqi` and `mqo` values will differ. |
|
9 |
#' |
|
10 |
#' @inheritParams plot_comparison_bars |
|
11 |
#' |
|
12 |
#' @param color_bars,color_outline The colours to use to fill the bars, and for |
|
13 |
#' the outlines/horizontal lines. Can be expressed as hex codes, or any |
|
14 |
#' colours listed in [colors()]. |
|
15 |
#' |
|
16 |
#' @family plotting functions |
|
17 |
#' |
|
18 |
#' @author Jack Davison |
|
19 |
#' |
|
20 |
#' @examples |
|
21 |
#' plot_mqi_bars( |
|
22 |
#' summarise_mqo_stats(demo_longterm, pollutant = "PM10") |
|
23 |
#' ) |
|
24 |
#' |
|
25 |
#' plot_mqi_bars( |
|
26 |
#' summarise_mqo_stats(demo_shortterm, pollutant = "PM10") |
|
27 |
#' ) |
|
28 |
#' |
|
29 |
#' @export |
|
30 |
plot_mqi_bars <- function( |
|
31 |
stats, |
|
32 |
color_bars = "grey85", |
|
33 |
color_outline = "black", |
|
34 |
interactive = FALSE |
|
35 |
) { |
|
36 | 4x |
validate_mqor_stats(stats) |
37 | 4x |
term <- stats$inputs$term |
38 | ||
39 | 4x |
stats$by_site$site <- forcats::fct_reorder( |
40 | 4x |
stats$by_site$site, |
41 | 4x |
stats$by_site$mqi |
42 |
) |
|
43 | ||
44 | 4x |
plotdata <- stats$by_site |
45 | ||
46 | 4x |
mqi90 <- stats$summary$mqi_90 |
47 | ||
48 | 4x |
if (interactive) { |
49 | 2x |
rlang::check_installed("plotly") |
50 | ||
51 | 2x |
plt <- |
52 | 2x |
plotly::plot_ly() |> |
53 | 2x |
plotly::add_trace( |
54 | 2x |
x = plotdata$site, |
55 | 2x |
y = plotdata$mqi, |
56 | 2x |
type = "bar", |
57 | 2x |
color = I(color_bars), |
58 | 2x |
marker = list(line = list(color = color_outline, width = 1)) |
59 |
) |> |
|
60 | 2x |
plotly::add_annotations( |
61 | 2x |
y = mqi90 + 0.02, |
62 | 2x |
xref = "paper", |
63 | 2x |
x = 0.025, |
64 | 2x |
text = as.character(round(mqi90, 2)), |
65 | 2x |
showarrow = FALSE |
66 |
) |> |
|
67 | 2x |
plotly::add_annotations( |
68 | 2x |
y = 1.02, |
69 | 2x |
xref = "paper", |
70 | 2x |
x = 0.025, |
71 | 2x |
text = "1.00", |
72 | 2x |
showarrow = FALSE |
73 |
) |> |
|
74 | 2x |
plotly::layout( |
75 | 2x |
shapes = list( |
76 | 2x |
hline(1, color = color_outline, dash = "solid"), |
77 | 2x |
hline(mqi90, color = color_outline, dash = "dot") |
78 |
), |
|
79 | 2x |
xaxis = list( |
80 | 2x |
title = "Sampling Points" |
81 |
), |
|
82 | 2x |
yaxis = list( |
83 | 2x |
title = paste0("MQI<sub>", term, "</sub>"), |
84 | 2x |
fixedrange = TRUE |
85 |
) |
|
86 |
) |
|
87 |
} else { |
|
88 | 2x |
plt <- |
89 | 2x |
ggplot2::ggplot( |
90 | 2x |
plotdata, |
91 | 2x |
ggplot2::aes(x = .data$site, y = .data$mqi) |
92 |
) + |
|
93 | 2x |
ggplot2::geom_col( |
94 | 2x |
width = (2 / 3), |
95 | 2x |
fill = color_bars, |
96 | 2x |
color = color_outline |
97 |
) + |
|
98 | 2x |
ggplot2::geom_hline(yintercept = 1L, lty = 1, color = color_outline) + |
99 | 2x |
ggplot2::geom_hline(yintercept = mqi90, lty = 2, color = color_outline) + |
100 | 2x |
ggplot2::theme_bw(14) + |
101 | 2x |
ggplot2::scale_y_continuous( |
102 | 2x |
expand = ggplot2::expansion(c(0, .1)), |
103 | 2x |
sec.axis = ggplot2::sec_axis(~., breaks = c(1, round(mqi90, 2))) |
104 |
) + |
|
105 | 2x |
ggplot2::labs( |
106 | 2x |
x = "Sampling Points", |
107 | 2x |
y = if (term == "short") { |
108 | 1x |
expression(MQI[short]) |
109 | 2x |
} else if (term == "long") { |
110 | 1x |
expression(MQI[long]) |
111 |
} |
|
112 |
) |
|
113 |
} |
|
114 | ||
115 | 4x |
return(plt) |
116 |
} |
|
117 | ||
118 |
#' Helper |
|
119 |
#' @noRd |
|
120 |
hline <- function(y = 0, color = "black", dash = "dash") { |
|
121 | 4x |
list( |
122 | 4x |
type = "line", |
123 | 4x |
x0 = 0, |
124 | 4x |
x1 = 1, |
125 | 4x |
xref = "paper", |
126 | 4x |
y0 = y, |
127 | 4x |
y1 = y, |
128 | 4x |
line = list(color = color, dash = dash) |
129 |
) |
|
130 |
} |
1 |
#' @method print mqor_stats |
|
2 |
#' @export |
|
3 |
print.mqor_stats <- function(x, ...) { |
|
4 | ! |
cli::cli_h1("{.pkg mqor} Statistics Set") |
5 | ! |
if (x$summary$mqo) { |
6 | ! |
cli::cli_alert_success("{.field MQO} Passed") |
7 |
} else { |
|
8 | ! |
cli::cli_alert_danger("{.field MQO} Failed") |
9 |
} |
|
10 | ! |
cli::cli_dl( |
11 | ! |
c( |
12 | ! |
Pollutant = x$inputs$pollutant, |
13 | ! |
Term = x$inputs$term |
14 |
) |
|
15 |
) |
|
16 | ||
17 | ! |
cli::cli_h2("Objects") |
18 | ! |
cli::cli_ul(id = "mqorlist") |
19 | ! |
for (i in names(x)) { |
20 | ! |
if (is.data.frame(x[[i]])) { |
21 | ! |
cli::cli_li("x{.field ${i}} ({nrow(x[[i]])} row{?s})") |
22 |
} else { |
|
23 | ! |
cli::cli_li("x{.field ${i}}") |
24 |
} |
|
25 |
} |
|
26 | ! |
cli::cli_end(id = "mqorlist") |
27 |
} |
|
28 | ||
29 |
#' @method summary mqor_stats |
|
30 |
#' @export |
|
31 |
summary.mqor_stats <- function(object, ...) { |
|
32 | ! |
object$summary |
33 |
} |
|
34 | ||
35 |
#' @method plot mqor_stats |
|
36 |
#' @export |
|
37 |
plot.mqor_stats <- function(x, ...) { |
|
38 | ! |
if (x$inputs$term == "long") { |
39 | ! |
plot_mqi_scatter(stats_longterm = x, ...) |
40 | ! |
} else if (x$inputs$term == "short") { |
41 | ! |
plot_mqi_scatter(stats_shortterm = x, ...) |
42 |
} |
|
43 |
} |
|
44 | ||
45 |
#' @noRd |
|
46 |
is.mqor_stats <- function(x) { |
|
47 | 26x |
inherits(x, "mqor_stats") |
48 |
} |
|
49 | ||
50 |
#' @noRd |
|
51 |
validate_mqor_stats <- function(stats) { |
|
52 | 32x |
if (!is.null(stats) && !is.mqor_stats(stats)) { |
53 | ! |
cli::cli_abort( |
54 | ! |
c( |
55 | ! |
"!" = "This function expects an {.pkg mqor} statistics set generated by {.fun mqor::summarise_mqo_stats}.", |
56 | ! |
"i" = "You have provided {.type {stats}}." |
57 |
), |
|
58 | ! |
call = NULL |
59 |
) |
|
60 |
} |
|
61 |
} |
1 |
#' Plot a 'report' summarising indicators for both short- and long-term model |
|
2 |
#' quality objectives |
|
3 |
#' |
|
4 |
#' This function produces a series of one-dimensional scatter plots reporting |
|
5 |
#' all of the spatial and temporal indicators, as well as the MQIs, contained |
|
6 |
#' within short- and long-term statistics objects. In non-interactive plots, a |
|
7 |
#' dotted vertical line indicates the 90th percentile (for MQIs & TIs) or |
|
8 |
#' maximum (for SIs) values for each statistic. |
|
9 |
#' |
|
10 |
#' @inheritParams plot_mqi_scatter |
|
11 |
#' |
|
12 |
#' @param color_target The colour to use for highlighting the target range for |
|
13 |
#' each statistic (always `0` to `1`). This colour will recieve an alpha value |
|
14 |
#' of `0.15` to ensure the scatter markers are still visible against it. |
|
15 |
#' |
|
16 |
#' @param censor The maximum value to show in the 'report'. Any values above |
|
17 |
#' this will be reduced to this value. The default, `2`, is twice the size of |
|
18 |
#' the objective of `1` and is appropriate for most use cases. |
|
19 |
#' |
|
20 |
#' @family plotting functions |
|
21 |
#' |
|
22 |
#' @author Jack Davison |
|
23 |
#' |
|
24 |
#' @returns a [patchwork][patchwork::patchwork-package] assemblage |
|
25 |
#' |
|
26 |
#' @examples |
|
27 |
#' long <- summarise_mqo_stats(demo_longterm, pollutant = "PM10") |
|
28 |
#' |
|
29 |
#' short <- summarise_mqo_stats(demo_shortterm, pollutant = "PM10") |
|
30 |
#' |
|
31 |
#' plot_mqi_report(short, long) |
|
32 |
#' |
|
33 |
#' @export |
|
34 |
plot_mqi_report <- function( |
|
35 |
stats_shortterm = NULL, |
|
36 |
stats_longterm = NULL, |
|
37 |
color_fixed = "#4269D0", |
|
38 |
color_indicative = "#EFB118", |
|
39 |
color_outline = "black", |
|
40 |
color_target = "green4", |
|
41 |
censor = 2, |
|
42 |
interactive = FALSE |
|
43 |
) { |
|
44 | 2x |
validate_mqor_stats(stats_shortterm) |
45 | 2x |
validate_mqor_stats(stats_longterm) |
46 | ||
47 | 2x |
if (!interactive) { |
48 | 1x |
plot_pi_bar <- function(data, x, xintercept, ylab) { |
49 | 9x |
data[[x]][data[[x]] > censor] <- censor |
50 | ||
51 | 9x |
ggplot2::ggplot(data, ggplot2::aes(x = abs(.data[[x]]), y = "")) + |
52 | 9x |
ggplot2::annotate( |
53 | 9x |
geom = "rect", |
54 | 9x |
ymax = Inf, |
55 | 9x |
ymin = -Inf, |
56 | 9x |
xmin = -Inf, |
57 | 9x |
xmax = 1, |
58 | 9x |
alpha = 0.15, |
59 | 9x |
fill = color_target |
60 |
) + |
|
61 | 9x |
ggplot2::geom_vline( |
62 | 9x |
xintercept = min(c(censor, xintercept)), |
63 | 9x |
lty = 3, |
64 | 9x |
color = color_outline |
65 |
) + |
|
66 | 9x |
ggplot2::geom_vline( |
67 | 9x |
xintercept = censor, |
68 | 9x |
lty = 2, |
69 | 9x |
color = color_outline |
70 |
) + |
|
71 | 9x |
ggplot2::geom_jitter(width = 0, ggplot2::aes(color = .data$type)) + |
72 | 9x |
ggplot2::scale_x_continuous( |
73 | 9x |
limits = c(0, censor), |
74 | 9x |
sec.axis = ggplot2::sec_axis( |
75 |
~., |
|
76 | 9x |
breaks = xintercept, |
77 | 9x |
labels = signif(xintercept, 2) |
78 |
) |
|
79 |
) + |
|
80 | 9x |
ggplot2::scale_color_manual( |
81 | 9x |
values = c( |
82 | 9x |
"fixed" = color_fixed, |
83 | 9x |
"indicative" = color_indicative |
84 |
), |
|
85 | 9x |
aesthetics = c("fill", "colour") |
86 |
) + |
|
87 | 9x |
ggplot2::labs(x = NULL, y = ylab, color = NULL) + |
88 | 9x |
ggplot2::theme_bw(14) + |
89 | 9x |
ggplot2::theme( |
90 | 9x |
axis.title.y = ggplot2::element_text( |
91 | 9x |
angle = 0, |
92 | 9x |
vjust = 0.5, |
93 | 9x |
hjust = 1 |
94 |
) |
|
95 |
) + |
|
96 | 9x |
ggplot2::coord_cartesian(clip = "off") |
97 |
} |
|
98 | ||
99 | 1x |
if (!is.null(stats_shortterm)) { |
100 | 1x |
plots_short <- list( |
101 | 1x |
plot_pi_bar( |
102 | 1x |
stats_shortterm$by_site, |
103 | 1x |
"mqi", |
104 | 1x |
stats_shortterm$summary$mqi_90, |
105 | 1x |
bquote(MQI[short]) |
106 |
) + |
|
107 | 1x |
ggplot2::labs(title = "Performance in Time"), |
108 | 1x |
plot_pi_bar( |
109 | 1x |
stats_shortterm$by_site, |
110 | 1x |
"ti_bias", |
111 | 1x |
stats_shortterm$summary$ti_bias_90, |
112 | 1x |
bquote(TI[B]) |
113 |
), |
|
114 | 1x |
plot_pi_bar( |
115 | 1x |
stats_shortterm$by_site, |
116 | 1x |
"ti_cor", |
117 | 1x |
stats_shortterm$summary$ti_cor_90, |
118 | 1x |
bquote(TI[R]) |
119 |
), |
|
120 | 1x |
plot_pi_bar( |
121 | 1x |
stats_shortterm$by_site, |
122 | 1x |
"ti_sd", |
123 | 1x |
stats_shortterm$summary$ti_sd_90, |
124 | 1x |
bquote(TI[sigma]) |
125 |
) |
|
126 |
) |
|
127 |
} else { |
|
128 | ! |
plots_short <- NULL |
129 |
} |
|
130 | ||
131 | 1x |
if (!is.null(stats_longterm)) { |
132 | 1x |
plots_long <- list( |
133 | 1x |
plot_pi_bar( |
134 | 1x |
stats_longterm$by_site, |
135 | 1x |
"mqi", |
136 | 1x |
stats_longterm$summary$mqi_90, |
137 | 1x |
bquote(MQI[long]) |
138 |
) + |
|
139 | 1x |
ggplot2::labs(title = "Performance in Space"), |
140 | 1x |
plot_pi_bar( |
141 | 1x |
stats_longterm$by_type, |
142 | 1x |
"si_rmse", |
143 | 1x |
max(stats_longterm$by_type$si_rmse), |
144 | 1x |
bquote(SI[RMSE]) |
145 |
), |
|
146 | 1x |
plot_pi_bar( |
147 | 1x |
stats_longterm$by_type, |
148 | 1x |
"si_bias", |
149 | 1x |
max(stats_longterm$by_type$si_bias), |
150 | 1x |
bquote(SI[B]) |
151 |
), |
|
152 | 1x |
plot_pi_bar( |
153 | 1x |
stats_longterm$by_type, |
154 | 1x |
"si_cor", |
155 | 1x |
max(stats_longterm$by_type$si_cor), |
156 | 1x |
bquote(SI[R]) |
157 |
), |
|
158 | 1x |
plot_pi_bar( |
159 | 1x |
stats_longterm$by_type, |
160 | 1x |
"si_sd", |
161 | 1x |
max(stats_longterm$by_type$si_sd), |
162 | 1x |
bquote(SI[sigma]) |
163 |
) |
|
164 |
) |
|
165 |
} else { |
|
166 | ! |
plots_long <- NULL |
167 |
} |
|
168 | ||
169 | 1x |
plts <- list() |
170 | 1x |
if (!is.null(plots_short)) { |
171 | 1x |
plts <- append(plts, plots_short) |
172 |
} |
|
173 | 1x |
if (!is.null(plots_short) && !is.null(plots_long)) { |
174 | 1x |
plts <- append(plts, list(patchwork::plot_spacer())) |
175 |
} |
|
176 | 1x |
if (!is.null(plots_long)) { |
177 | 1x |
plts <- append(plts, plots_long) |
178 |
} |
|
179 | ||
180 | 1x |
plt <- |
181 | 1x |
patchwork::wrap_plots( |
182 | 1x |
ncol = 1, |
183 | 1x |
plts |
184 |
) + |
|
185 | 1x |
patchwork::plot_layout(guides = "collect", axes = "collect") & |
186 | 1x |
ggplot2::theme( |
187 | 1x |
legend.position = ifelse( |
188 | 1x |
dplyr::n_distinct(stats_shortterm$by_site$type) > 1, |
189 | 1x |
"bottom", |
190 | 1x |
"none" |
191 |
), |
|
192 | 1x |
legend.justification = "left" |
193 |
) |
|
194 | ||
195 | 1x |
return(plt) |
196 |
} |
|
197 | ||
198 | 1x |
if (interactive) { |
199 | 1x |
add_report_plotly <- function(plt, data, showlegend = FALSE) { |
200 | 2x |
plt |> |
201 | 2x |
plotly::add_bars( |
202 | 2x |
inherit = FALSE, |
203 | 2x |
y = dplyr::distinct(data, .data$name)$name, |
204 | 2x |
x = rep(2, dplyr::n_distinct(data$name)), |
205 | 2x |
marker = list(line = list(color = color_outline, width = 1)), |
206 | 2x |
color = I(grDevices::rgb(0, 0, 0, 0)), |
207 | 2x |
hoverinfo = "skip", |
208 | 2x |
showlegend = FALSE, |
209 | 2x |
colors = c( |
210 | 2x |
"fixed" = color_fixed, |
211 | 2x |
"indicative" = color_indicative |
212 |
) |
|
213 |
) |> |
|
214 | 2x |
plotly::add_bars( |
215 | 2x |
inherit = FALSE, |
216 | 2x |
y = dplyr::distinct(data, .data$name)$name, |
217 | 2x |
x = rep(1, dplyr::n_distinct(data$name)), |
218 | 2x |
color = I(col), |
219 | 2x |
hoverinfo = "skip", |
220 | 2x |
showlegend = FALSE, |
221 | 2x |
colors = c( |
222 | 2x |
"fixed" = color_fixed, |
223 | 2x |
"indicative" = color_indicative |
224 |
) |
|
225 |
) |> |
|
226 | 2x |
plotly::add_markers( |
227 | 2x |
colors = c( |
228 | 2x |
"fixed" = color_fixed, |
229 | 2x |
"indicative" = color_indicative |
230 |
), |
|
231 | 2x |
showlegend = showlegend |
232 |
) |> |
|
233 | 2x |
plotly::layout( |
234 | 2x |
barmode = "overlay", |
235 | 2x |
xaxis = list( |
236 | 2x |
range = c(-0.1, censor), |
237 | 2x |
mirror = TRUE, |
238 | 2x |
title = "" |
239 |
), |
|
240 | 2x |
yaxis = list( |
241 | 2x |
title = "", |
242 | 2x |
fixedrange = TRUE |
243 |
), |
|
244 | 2x |
legend = list( |
245 | 2x |
orientation = "h", |
246 | 2x |
y = 1.1 |
247 |
), |
|
248 | 2x |
showlegend = TRUE |
249 |
) |
|
250 |
} |
|
251 | ||
252 | 1x |
col <- t(grDevices::col2rgb(color_target)) |
253 | 1x |
col <- grDevices::rgb( |
254 | 1x |
red = col[1, "red"], |
255 | 1x |
green = col[1, "green"], |
256 | 1x |
blue = col[1, "blue"], |
257 | 1x |
alpha = 255 * 0.15, |
258 | 1x |
maxColorValue = 255 |
259 |
) |
|
260 | ||
261 | 1x |
if (!is.null(stats_shortterm)) { |
262 | 1x |
shortdat <- |
263 | 1x |
stats_shortterm$by_site |> |
264 | 1x |
dplyr::select(c("site", "type", "mqi", "ti_bias", "ti_cor", "ti_sd")) |> |
265 | 1x |
tidyr::pivot_longer(c("mqi", "ti_bias", "ti_cor", "ti_sd")) |> |
266 | 1x |
dplyr::mutate( |
267 | 1x |
name = factor( |
268 | 1x |
.data$name, |
269 | 1x |
rev(c("mqi", "ti_bias", "ti_cor", "ti_sd")), |
270 | 1x |
rev(c( |
271 | 1x |
"MQI<sub>short</sub>", |
272 | 1x |
"TI<sub>B</sub>", |
273 | 1x |
"TI<sub>R</sub>", |
274 | 1x |
"TI<sub>SD</sub>" |
275 |
)) |
|
276 |
), |
|
277 | 1x |
value = abs(.data$value), |
278 | 1x |
value = ifelse(.data$value > censor, censor, .data$value) |
279 |
) |
|
280 | ||
281 | 1x |
plt_short <- |
282 | 1x |
shortdat |> |
283 | 1x |
plotly::plot_ly( |
284 | 1x |
y = ~name, |
285 | 1x |
x = ~ abs(value), |
286 | 1x |
color = ~type, |
287 | 1x |
colors = c( |
288 | 1x |
"fixed" = color_fixed, |
289 | 1x |
"indicative" = color_indicative |
290 |
), |
|
291 | 1x |
legendgroup = ~type, |
292 | 1x |
text = ~site |
293 |
) |> |
|
294 | 1x |
add_report_plotly(shortdat, showlegend = TRUE) |
295 |
} else { |
|
296 | ! |
plt_short <- NULL |
297 |
} |
|
298 | ||
299 | 1x |
if (!is.null(stats_longterm)) { |
300 | 1x |
longdat <- |
301 | 1x |
dplyr::bind_rows( |
302 | 1x |
dplyr::select( |
303 | 1x |
stats_longterm$by_site, |
304 | 1x |
c("site", "type", "mqi") |
305 |
) |> |
|
306 | 1x |
tidyr::pivot_longer("mqi"), |
307 | 1x |
tidyr::pivot_longer( |
308 | 1x |
data = stats_longterm$by_type, |
309 | 1x |
-"type" |
310 |
) |
|
311 |
) |> |
|
312 | 1x |
dplyr::mutate( |
313 | 1x |
name = factor( |
314 | 1x |
.data$name, |
315 | 1x |
rev(c("mqi", "si_rmse", "si_bias", "si_cor", "si_sd")), |
316 | 1x |
rev(c( |
317 | 1x |
"MQI<sub>long</sub>", |
318 | 1x |
"SI<sub>RMSE</sub>", |
319 | 1x |
"SI<sub>B</sub>", |
320 | 1x |
"SI<sub>R</sub>", |
321 | 1x |
"SI<sub>SD</sub>" |
322 |
)) |
|
323 |
), |
|
324 | 1x |
value = abs(.data$value), |
325 | 1x |
value = ifelse(.data$value > censor, censor, .data$value) |
326 |
) |
|
327 | ||
328 | 1x |
plt_long <- |
329 | 1x |
longdat |> |
330 | 1x |
plotly::plot_ly( |
331 | 1x |
y = ~name, |
332 | 1x |
x = ~ abs(value), |
333 | 1x |
color = ~type, |
334 | 1x |
text = ~site, |
335 | 1x |
legendgroup = ~type, |
336 | 1x |
showlegend = is.null(stats_shortterm) |
337 |
) |> |
|
338 | 1x |
add_report_plotly(longdat, showlegend = is.null(stats_shortterm)) |
339 |
} else { |
|
340 | ! |
plt_long <- NULL |
341 |
} |
|
342 | ||
343 | 1x |
if (!is.null(plt_short) && !is.null(plt_long)) { |
344 | 1x |
plt <- |
345 | 1x |
plotly::subplot( |
346 | 1x |
shareX = TRUE, |
347 | 1x |
titleY = TRUE, |
348 | 1x |
nrows = 2, |
349 | 1x |
plt_short, |
350 | 1x |
plt_long, |
351 | 1x |
heights = c(4 / 9, 5 / 9) |
352 |
) |
|
353 | ! |
} else if (!is.null(plt_short)) { |
354 | ! |
plt <- plt_short |
355 | ! |
} else if (!is.null(plt_long)) { |
356 | ! |
plt <- plt_long |
357 |
} |
|
358 | ||
359 | 1x |
return(plt) |
360 |
} |
|
361 |
} |
1 |
#' Plot a bar chart comparing observed (measured) and modelled concentrations |
|
2 |
#' |
|
3 |
#' This function produces a plot comparing modelled and measured concentrations. |
|
4 |
#' For long-term data, the provided annual averaged measured data is visualised |
|
5 |
#' alongside the performance acceptability range of the observations. For |
|
6 |
#' short-term data, the RMSE and RMSU*0 are visualised. |
|
7 |
#' |
|
8 |
#' @param stats The output of [summarise_mqo_stats()]. All relevant information |
|
9 |
#' (e.g., `term`, `params_fixed`, etc.) will be passed to this function. |
|
10 |
#' |
|
11 |
#' @param interactive If `FALSE`, the default, a static `ggplot2` graphic will |
|
12 |
#' be returned which can be saved as a PNG, SVG, or other similar format. If |
|
13 |
#' `TRUE`, a dynamic HTML widget will be returned created by `plotly`. |
|
14 |
#' |
|
15 |
#' @param color_obs,color_mod,color_outline The colours to use to fill the |
|
16 |
#' 'observation' bars, 'modelled' bars, and for the outlines/error bars. Can |
|
17 |
#' be expressed as hex codes, or any colours listed in [colors()]. |
|
18 |
#' |
|
19 |
#' @family plotting functions |
|
20 |
#' |
|
21 |
#' @author Jack Davison |
|
22 |
#' |
|
23 |
#' @examples |
|
24 |
#' plot_comparison_bars( |
|
25 |
#' summarise_mqo_stats(demo_longterm, pollutant = "PM10") |
|
26 |
#' ) |
|
27 |
#' |
|
28 |
#' plot_comparison_bars( |
|
29 |
#' summarise_mqo_stats(demo_shortterm, pollutant = "PM10") |
|
30 |
#' ) |
|
31 |
#' |
|
32 |
#' @export |
|
33 |
plot_comparison_bars <- function( |
|
34 |
stats, |
|
35 |
color_obs = "grey85", |
|
36 |
color_mod = "black", |
|
37 |
color_outline = "black", |
|
38 |
interactive = FALSE |
|
39 |
) { |
|
40 | 4x |
validate_mqor_stats(stats) |
41 | 4x |
if (interactive) { |
42 | 2x |
rlang::check_installed("plotly") |
43 |
} |
|
44 | 4x |
term <- stats$inputs$term |
45 | ||
46 | 4x |
if (term == "long") { |
47 | 2x |
plotdata <- |
48 | 2x |
stats$by_site |> |
49 | 2x |
tidyr::pivot_longer(cols = c("mod", "obs"), names_to = "stat") |> |
50 | 2x |
dplyr::mutate( |
51 | 2x |
ar_low = dplyr::if_else(.data$stat == "mod", NA, .data$ar_low), |
52 | 2x |
ar_up = dplyr::if_else(.data$stat == "mod", NA, .data$ar_up), |
53 | 2x |
error = dplyr::if_else( |
54 | 2x |
.data$stat == "mod", |
55 | 2x |
0, |
56 | 2x |
.data$ar_up - .data$value |
57 |
) |
|
58 |
) |> |
|
59 | 2x |
dplyr::mutate( |
60 | 2x |
stat = factor( |
61 | 2x |
.data$stat, |
62 | 2x |
c("obs", "mod"), |
63 | 2x |
c("Observations", "Modelled Values") |
64 |
) |
|
65 |
) |
|
66 | ||
67 | 2x |
if (interactive) { |
68 |
# need to split the data, to overcome known issue in {plotly} |
|
69 |
# <https://github.com/plotly/plotly.R/issues/762> |
|
70 | 1x |
plotdata_splits <- split(plotdata, plotdata$stat) |
71 | ||
72 | 1x |
plt <- |
73 | 1x |
plotly::plot_ly() |> |
74 | 1x |
plotly::add_trace( |
75 | 1x |
type = "bar", |
76 | 1x |
x = plotdata_splits$Observations[["site"]], |
77 | 1x |
y = plotdata_splits$Observations$value, |
78 | 1x |
error_y = list( |
79 | 1x |
array = plotdata_splits$Observations$error, |
80 | 1x |
color = color_outline |
81 |
), |
|
82 | 1x |
name = "Observations", |
83 | 1x |
color = I(color_obs), |
84 | 1x |
marker = list(line = list(color = color_outline, width = 1)) |
85 |
) |> |
|
86 | 1x |
plotly::add_trace( |
87 | 1x |
type = "bar", |
88 | 1x |
x = plotdata_splits$`Modelled Values`[["site"]], |
89 | 1x |
y = plotdata_splits$`Modelled Values`$value, |
90 | 1x |
name = "Modelled Values", |
91 | 1x |
color = I(color_mod), |
92 | 1x |
marker = list(line = list(color = color_outline, width = 1)) |
93 |
) |> |
|
94 | 1x |
plotly::layout( |
95 | 1x |
xaxis = list(title = "Sampling Points"), |
96 | 1x |
yaxis = list( |
97 | 1x |
title = "Concentrations (μg m<sup>-3</sup>)", |
98 | 1x |
fixedrange = TRUE |
99 |
), |
|
100 | 1x |
legend = list(orientation = "h", y = 1.1), |
101 | 1x |
showlegend = TRUE |
102 |
) |
|
103 |
} else { |
|
104 | 1x |
plt <- |
105 | 1x |
ggplot2::ggplot( |
106 | 1x |
plotdata, |
107 | 1x |
ggplot2::aes(x = .data[["site"]], y = .data$value) |
108 |
) + |
|
109 | 1x |
ggplot2::geom_col( |
110 | 1x |
ggplot2::aes(fill = .data$stat), |
111 | 1x |
position = ggplot2::position_dodge2(width = 2 / 3), |
112 | 1x |
width = 2 / 3, |
113 | 1x |
color = color_outline |
114 |
) + |
|
115 | 1x |
ggplot2::geom_linerange( |
116 | 1x |
ggplot2::aes( |
117 | 1x |
ymax = .data$ar_up, |
118 | 1x |
ymin = .data$ar_low, |
119 | 1x |
group = .data$stat |
120 |
), |
|
121 | 1x |
na.rm = TRUE, |
122 | 1x |
position = ggplot2::position_dodge2(width = 2 / 3) |
123 |
) + |
|
124 | 1x |
ggplot2::theme_bw(14) + |
125 | 1x |
ggplot2::scale_y_continuous(expand = ggplot2::expansion(c(0, .1))) + |
126 | 1x |
ggplot2::scale_fill_manual(values = c(color_obs, color_mod)) + |
127 | 1x |
ggplot2::theme(legend.position = "top", legend.justification = "left") + |
128 | 1x |
ggplot2::labs( |
129 | 1x |
x = "Sampling Points", |
130 | 1x |
y = format_text("Concentrations (ug/m3)"), |
131 | 1x |
fill = NULL |
132 |
) |
|
133 |
} |
|
134 |
} |
|
135 | ||
136 | 4x |
if (term == "short") { |
137 | 2x |
plotdata <- |
138 | 2x |
stats$by_site |> |
139 | 2x |
dplyr::select(dplyr::all_of(c("site", "rmse", "rmsu_star"))) |> |
140 | 2x |
tidyr::pivot_longer( |
141 | 2x |
cols = -"site", |
142 | 2x |
names_to = "stat" |
143 |
) |> |
|
144 | 2x |
dplyr::mutate( |
145 | 2x |
stat = factor(.data$stat, c("rmse", "rmsu_star")) |
146 |
) |
|
147 | ||
148 | 2x |
if (interactive) { |
149 | 1x |
plotdata_splits <- split(plotdata, plotdata$stat) |
150 | ||
151 | 1x |
plt <- |
152 | 1x |
plotly::plot_ly() |> |
153 | 1x |
plotly::add_trace( |
154 | 1x |
type = "bar", |
155 | 1x |
x = plotdata_splits$rmse[["site"]], |
156 | 1x |
y = plotdata_splits$rmse$value, |
157 | 1x |
name = "RMSE", |
158 | 1x |
color = I(color_obs), |
159 | 1x |
marker = list(line = list(color = color_outline, width = 1)) |
160 |
) |> |
|
161 | 1x |
plotly::add_trace( |
162 | 1x |
type = "bar", |
163 | 1x |
x = plotdata_splits$rmsu_star[["site"]], |
164 | 1x |
y = plotdata_splits$rmsu_star$value, |
165 | 1x |
name = "RMSU<sub>0</sub><sup>*</sup>", |
166 | 1x |
color = I(color_mod), |
167 | 1x |
marker = list(line = list(color = color_outline, width = 1)) |
168 |
) |> |
|
169 | 1x |
plotly::layout( |
170 | 1x |
xaxis = list(title = "Sampling Points"), |
171 | 1x |
yaxis = list( |
172 | 1x |
title = "Concentrations (μg m<sup>-3</sup>)", |
173 | 1x |
fixedrange = TRUE |
174 |
), |
|
175 | 1x |
legend = list(orientation = "h", y = 1.1), |
176 | 1x |
showlegend = TRUE |
177 |
) |
|
178 |
} else { |
|
179 | 1x |
plt <- |
180 | 1x |
ggplot2::ggplot( |
181 | 1x |
plotdata, |
182 | 1x |
ggplot2::aes(x = .data[["site"]], y = .data$value) |
183 |
) + |
|
184 | 1x |
ggplot2::geom_col( |
185 | 1x |
ggplot2::aes(fill = .data$stat), |
186 | 1x |
position = ggplot2::position_dodge2(width = 2 / 3), |
187 | 1x |
width = 2 / 3, |
188 | 1x |
color = color_outline |
189 |
) + |
|
190 | 1x |
ggplot2::theme_bw(14) + |
191 | 1x |
ggplot2::scale_y_continuous(expand = ggplot2::expansion(c(0, .1))) + |
192 | 1x |
ggplot2::scale_fill_manual( |
193 | 1x |
values = c(color_obs, color_mod), |
194 | 1x |
labels = c(expression(RMSE), expression(RMSU[O]^"*")) |
195 |
) + |
|
196 | 1x |
ggplot2::theme(legend.position = "top", legend.justification = "left") + |
197 | 1x |
ggplot2::labs( |
198 | 1x |
x = "Sampling Points", |
199 | 1x |
y = format_text("Concentration (ug/m3)"), |
200 | 1x |
fill = NULL |
201 |
) |
|
202 |
} |
|
203 |
} |
|
204 | ||
205 | 4x |
return(plt) |
206 |
} |
1 |
#' Write tables of MQO summary data to a file |
|
2 |
#' |
|
3 |
#' This function takes the output of [summarise_mqo_stats()] and writes it to a |
|
4 |
#' file. Either short-term or long-term data can be written, or both at once. |
|
5 |
#' |
|
6 |
#' @inheritParams plot_mqi_report |
|
7 |
#' |
|
8 |
#' @inheritParams data.table::fwrite |
|
9 |
#' |
|
10 |
#' @param digits Integer indicating the number of decimal places to be used when |
|
11 |
#' rounding numeric values. |
|
12 |
#' |
|
13 |
#' @param file Output file name. `""` indicates output to the console. |
|
14 |
#' |
|
15 |
#' @param delim The delimiter used to separate columns in the files. The default |
|
16 |
#' is `";"`. |
|
17 |
#' |
|
18 |
#' @author Jack Davison |
|
19 |
#' |
|
20 |
#' @export |
|
21 |
write_mqo_stats <- function( |
|
22 |
stats_shortterm = NULL, |
|
23 |
stats_longterm = NULL, |
|
24 |
file = "", |
|
25 |
digits = 2, |
|
26 |
delim = ";" |
|
27 |
) { |
|
28 | 3x |
validate_mqor_stats(stats_longterm) |
29 | 3x |
validate_mqor_stats(stats_shortterm) |
30 | ||
31 | 3x |
fmt <- function(x) { |
32 | 14x |
stringr::str_replace_all(x, "_", " ") |> |
33 | 14x |
stringr::str_to_title() |
34 |
} |
|
35 | ||
36 | 3x |
write_empty_line <- function() { |
37 | 18x |
data.table::fwrite(as.list(""), file = file, append = TRUE) |
38 |
} |
|
39 | ||
40 | 3x |
if (file.exists(file)) { |
41 | ! |
unlink(file, force = TRUE) |
42 |
} |
|
43 | ||
44 | 3x |
write_stats <- function(stats) { |
45 | 4x |
title <- glue::glue( |
46 | 4x |
"{fmt(stats$inputs$term)}-term {stats$inputs$pollutant} " |
47 |
) |
|
48 | ||
49 | 4x |
data.table::fwrite(as.list(title), file = file, append = TRUE) |
50 | 4x |
write_empty_line() |
51 | ||
52 | 4x |
data.table::fwrite(as.list("Parameters"), file = file, append = TRUE) |
53 | 4x |
dplyr::bind_rows( |
54 | 4x |
"Fixed" = stats$inputs$params_fixed, |
55 | 4x |
"Indicative" = stats$inputs$params_indicative, |
56 | 4x |
.id = "type" |
57 |
) |> |
|
58 | 4x |
data.table::fwrite( |
59 | 4x |
file = file, |
60 | 4x |
append = TRUE, |
61 | 4x |
col.names = TRUE, |
62 | 4x |
row.names = FALSE, |
63 | 4x |
sep = delim, |
64 | 4x |
quote = FALSE |
65 |
) |
|
66 | ||
67 | 4x |
purrr::walk( |
68 | 4x |
names(stats)[names(stats) != "inputs"], |
69 | 4x |
function(x) { |
70 | 10x |
write_empty_line() |
71 | 10x |
data.table::fwrite(as.list(fmt(x)), file = file, append = TRUE) |
72 | 10x |
suppressWarnings( |
73 | 10x |
data.table::fwrite( |
74 | 10x |
x = stats[[x]] |> |
75 | 10x |
dplyr::mutate(dplyr::across(dplyr::where(is.numeric), \(x) { |
76 | 72x |
round(x, digits = digits) |
77 |
})), |
|
78 | 10x |
file = file, |
79 | 10x |
append = TRUE, |
80 | 10x |
col.names = TRUE, |
81 | 10x |
row.names = FALSE, |
82 | 10x |
sep = delim, |
83 | 10x |
quote = FALSE |
84 |
) |
|
85 |
) |
|
86 |
} |
|
87 |
) |
|
88 | ||
89 | 4x |
write_empty_line() |
90 |
} |
|
91 | ||
92 | 3x |
if (!is.null(stats_shortterm)) { |
93 | 2x |
write_stats(stats_shortterm) |
94 |
} |
|
95 | 3x |
if (!is.null(stats_longterm)) { |
96 | 2x |
write_stats(stats_longterm) |
97 |
} |
|
98 | ||
99 | 3x |
return(invisible(file)) |
100 |
} |
1 |
#' Quickly plot observed (measured) and modelled concentrations over time in |
|
2 |
#' short-term data |
|
3 |
#' |
|
4 |
#' This is a useful function for examining short-term data *after* it has been |
|
5 |
#' filtered or aggregated, but *before* it statistics are calculated. Unlike |
|
6 |
#' other plotting functions in `{mqor}`, this function **does not** take the |
|
7 |
#' output of [summarise_mqo_stats()]. |
|
8 |
#' |
|
9 |
#' @param data An R `data.frame` containing at least four columns; a numeric |
|
10 |
#' column of observed values, a numeric column of modelled values, a character |
|
11 |
#' or factor column of identifiers that identify the site associated with the |
|
12 |
#' concentrations, and a character or factor column of identifiers that |
|
13 |
#' identify the pollutant being measured/modelled. See [demo_shortterm] for an |
|
14 |
#' example format. |
|
15 |
#' |
|
16 |
#' @param pollutant The pollutant to plot. Should be a value contained within |
|
17 |
#' `data[[dict$pollutant]]`. |
|
18 |
#' |
|
19 |
#' @param site The station to plot.s Should be a value contained within |
|
20 |
#' `data[[dict$site]]`. |
|
21 |
#' |
|
22 |
#' @param dict See [mqo_dict()] for more information. Acts as a data dictionary |
|
23 |
#' to specify the columns in the data `{mqor}` should use. |
|
24 |
#' |
|
25 |
#' @param color_obs,color_mod The colours to use for the 'observation' and |
|
26 |
#' 'modelled' lines. Can be expressed as hex codes, or any colours listed in |
|
27 |
#' [colors()]. |
|
28 |
#' |
|
29 |
#' @inheritParams plot_comparison_bars |
|
30 |
#' |
|
31 |
#' @family plotting functions |
|
32 |
#' |
|
33 |
#' @author Jack Davison |
|
34 |
#' |
|
35 |
#' @export |
|
36 |
plot_timeseries <- function( |
|
37 |
data, |
|
38 |
pollutant, |
|
39 |
site, |
|
40 |
color_obs = "#6CC5B0", |
|
41 |
color_mod = "black", |
|
42 |
dict = mqor::mqo_dict(), |
|
43 |
interactive = FALSE |
|
44 |
) { |
|
45 | 2x |
pollutant <- rlang::arg_match( |
46 | 2x |
pollutant, |
47 | 2x |
unique(as.character(data[[dict$pollutant]])) |
48 |
) |
|
49 | ||
50 | 2x |
data <- data[ |
51 | 2x |
data[[dict$pollutant]] == pollutant & data[[dict$site]] == site, |
52 |
] |
|
53 | ||
54 | 2x |
if (nrow(data) == 0L) { |
55 | ! |
cli::cli_warn( |
56 | ! |
"No data returned; please check {.field pollutant} and {.field site}." |
57 |
) |
|
58 | ! |
return(NULL) |
59 |
} |
|
60 | ||
61 | 2x |
plotdata <- |
62 | 2x |
tidyr::pivot_longer( |
63 | 2x |
data, |
64 | 2x |
cols = dplyr::all_of(c(dict$obs, dict$mod)), |
65 | 2x |
names_to = "mqor_name", |
66 | 2x |
values_to = "mqor_value" |
67 |
) |
|
68 | ||
69 | 2x |
if (interactive) { |
70 | 1x |
plotdata$mqor_name[plotdata$mqor_name == "obs"] <- "Observations" |
71 | 1x |
plotdata$mqor_name[plotdata$mqor_name == "mod"] <- "Modelled Values" |
72 | ||
73 | 1x |
plt <- |
74 | 1x |
plotly::plot_ly() |> |
75 | 1x |
plotly::add_trace( |
76 | 1x |
x = plotdata[[dict$date]], |
77 | 1x |
y = plotdata$mqor_value, |
78 | 1x |
color = plotdata$mqor_name, |
79 | 1x |
colors = stats::setNames( |
80 | 1x |
c(color_mod, color_obs), |
81 | 1x |
c("Modelled Values", "Observations") |
82 |
), |
|
83 | 1x |
type = "scatter", |
84 | 1x |
mode = "lines+markers" |
85 |
) |> |
|
86 | 1x |
plotly::layout( |
87 | 1x |
yaxis = list(title = toupper(data[[dict$pollutant]][1])), |
88 | 1x |
xaxis = list(title = ""), |
89 | 1x |
legend = list( |
90 | 1x |
orientation = "h", |
91 | 1x |
y = 1.1 |
92 |
), |
|
93 | 1x |
hovermode = "x unified" |
94 |
) |
|
95 |
} |
|
96 | ||
97 | 2x |
if (!interactive) { |
98 | 1x |
plt <- |
99 | 1x |
plotdata |> |
100 | 1x |
ggplot2::ggplot(ggplot2::aes( |
101 | 1x |
x = .data[[dict$date]], |
102 | 1x |
y = .data$mqor_value |
103 |
)) + |
|
104 | 1x |
ggplot2::geom_line(ggplot2::aes(color = .data$mqor_name)) + |
105 | 1x |
ggplot2::geom_point(ggplot2::aes(color = .data$mqor_name)) + |
106 | 1x |
ggplot2::theme_bw(14) + |
107 | 1x |
ggplot2::theme(legend.position = "top", legend.justification = "left") + |
108 | 1x |
ggplot2::labs( |
109 | 1x |
x = NULL, |
110 | 1x |
y = toupper(data[[dict$pollutant]][1]), |
111 | 1x |
color = NULL |
112 |
) + |
|
113 | 1x |
ggplot2::scale_color_manual( |
114 | 1x |
values = stats::setNames( |
115 | 1x |
c(color_mod, color_obs), |
116 | 1x |
c(dict$mod, dict$obs) |
117 |
), |
|
118 | 1x |
breaks = c(dict$obs, dict$mod), |
119 | 1x |
labels = c("Observations", "Modelled Values") |
120 |
) |
|
121 |
} |
|
122 | ||
123 | 2x |
return(plt) |
124 |
} |
1 |
#' Create an interactive table of MQO summary data |
|
2 |
#' |
|
3 |
#' This function takes the output of [summarise_mqo_stats()] and creates an |
|
4 |
#' interactive table of the outputs. The output table is nested per object, |
|
5 |
#' |
|
6 |
#' @inheritParams plot_comparison_bars |
|
7 |
#' |
|
8 |
#' @param expanded Initialise the widget as expanded? Defaults to `TRUE`. Passed |
|
9 |
#' to [reactable::reactable()]'s `defaultExpanded` argument. |
|
10 |
#' |
|
11 |
#' @param color_pass,color_fail,color_fixed,color_indicative These colours are |
|
12 |
#' used to colour various cells in the output. Can be expressed as hex codes, |
|
13 |
#' or any colours listed in [colors()]. |
|
14 |
#' |
|
15 |
#' @returns a `reactable::reactable()` object |
|
16 |
#' |
|
17 |
#' @examples |
|
18 |
#' tabulate_mqo_stats( |
|
19 |
#' summarise_mqo_stats(demo_longterm, pollutant = "PM10") |
|
20 |
#' ) |
|
21 |
#' |
|
22 |
#' tabulate_mqo_stats( |
|
23 |
#' summarise_mqo_stats(demo_shortterm, pollutant = "PM10") |
|
24 |
#' ) |
|
25 |
#' |
|
26 |
#' @export |
|
27 |
tabulate_mqo_stats <- function( |
|
28 |
stats, |
|
29 |
expanded = TRUE, |
|
30 |
color_pass = "#136F63", |
|
31 |
color_fail = "#840032", |
|
32 |
color_fixed = "#4269D0", |
|
33 |
color_indicative = "#EFB118" |
|
34 |
) { |
|
35 | 2x |
validate_mqor_stats(stats) |
36 | 2x |
rlang::check_installed("reactable") |
37 | ||
38 | 2x |
style_lt_one <- make_style_lt_one( |
39 | 2x |
color_pass = color_pass, |
40 | 2x |
color_fail = color_fail |
41 |
) |
|
42 | ||
43 | 2x |
style_type <- make_style_type( |
44 | 2x |
color_fixed = color_fixed, |
45 | 2x |
color_indicative = color_indicative |
46 |
) |
|
47 | ||
48 | 2x |
tblname <- glue::glue( |
49 | 2x |
"MQO Statistics: {stats$inputs$pollutant}, {stats$inputs$term} term" |
50 |
) |
|
51 | ||
52 | 2x |
term <- stats$inputs$term |
53 | ||
54 | 2x |
if (term == "short") { |
55 | 1x |
tbl <- dplyr::tibble( |
56 | 1x |
x = c("Overall Summary", "Stats by Site", "Parameters") |
57 |
) |> |
|
58 | 1x |
stats::setNames(tblname) |> |
59 | 1x |
reactable::reactable( |
60 | 1x |
defaultExpanded = expanded, |
61 | 1x |
sortable = FALSE, |
62 | 1x |
searchable = FALSE, |
63 | 1x |
filterable = FALSE, |
64 | 1x |
details = function(index) { |
65 | 3x |
if (index == 1) { |
66 | 1x |
tabulate_short_summary(stats$summary, style_lt_one = style_lt_one) |
67 | 2x |
} else if (index == 2) { |
68 | 1x |
tabulate_short_by_site( |
69 | 1x |
stats$by_site, |
70 | 1x |
style_lt_one = style_lt_one, |
71 | 1x |
style_type = style_type |
72 |
) |
|
73 | 1x |
} else if (index == 3) { |
74 | 1x |
tabulate_inputs( |
75 | 1x |
stats$inputs, |
76 | 1x |
term = term |
77 |
) |
|
78 |
} |
|
79 |
} |
|
80 |
) |
|
81 |
} |
|
82 | ||
83 | 2x |
if (term == "long") { |
84 | 1x |
tbl <- |
85 | 1x |
dplyr::tibble( |
86 | 1x |
x = c("Overall Summary", "Stats by Type", "Stats by Site", "Parameters") |
87 |
) |> |
|
88 | 1x |
stats::setNames(tblname) |> |
89 | 1x |
reactable::reactable( |
90 | 1x |
defaultExpanded = expanded, |
91 | 1x |
sortable = FALSE, |
92 | 1x |
searchable = FALSE, |
93 | 1x |
filterable = FALSE, |
94 | 1x |
details = function(index) { |
95 | 4x |
if (index == 1) { |
96 | 1x |
tabulate_long_summary(stats$summary, style_lt_one = style_lt_one) |
97 | 3x |
} else if (index == 2) { |
98 | 1x |
tabulate_long_by_type( |
99 | 1x |
stats$by_type, |
100 | 1x |
style_lt_one = style_lt_one, |
101 | 1x |
style_type = style_type |
102 |
) |
|
103 | 2x |
} else if (index == 3) { |
104 | 1x |
tabulate_long_by_site( |
105 | 1x |
stats$by_site, |
106 | 1x |
style_lt_one = style_lt_one, |
107 | 1x |
style_type = style_type |
108 |
) |
|
109 | 1x |
} else if (index == 4) { |
110 | 1x |
tabulate_inputs(stats$inputs, term = term) |
111 |
} |
|
112 |
} |
|
113 |
) |
|
114 |
} |
|
115 | 2x |
return(tbl) |
116 |
} |
|
117 | ||
118 |
#' @noRd |
|
119 |
tabulate_short_summary <- function(data, style_lt_one) { |
|
120 | 1x |
reactable::reactable( |
121 | 1x |
dplyr::relocate(data, "mqo"), |
122 | 1x |
outlined = TRUE, |
123 | 1x |
defaultColDef = reactable::colDef( |
124 | 1x |
html = TRUE, |
125 | 1x |
format = reactable::colFormat(digits = 2) |
126 |
), |
|
127 | 1x |
pagination = FALSE, |
128 | 1x |
columns = list( |
129 | 1x |
mqo = reactable::colDef( |
130 | 1x |
"MQO", |
131 | 1x |
align = "right", |
132 | 1x |
cell = function(value) { |
133 | 1x |
ifelse(value, paste("Pass ", svg_check), paste("Fail ", svg_cross)) |
134 |
}, |
|
135 | 1x |
style = style_lt_one |
136 |
), |
|
137 | 1x |
mqi_90 = reactable::colDef( |
138 | 1x |
"MQI<sub>90th</sub>", |
139 | 1x |
cell = cell_lt_one, |
140 | 1x |
style = style_lt_one |
141 |
), |
|
142 | 1x |
ti_bias_90 = reactable::colDef( |
143 | 1x |
"TI<sub>B,90th</sub>", |
144 | 1x |
cell = cell_lt_one, |
145 | 1x |
style = style_lt_one |
146 |
), |
|
147 | 1x |
ti_cor_90 = reactable::colDef( |
148 | 1x |
"TI<sub>R,90th</sub>", |
149 | 1x |
cell = cell_lt_one, |
150 | 1x |
style = style_lt_one |
151 |
), |
|
152 | 1x |
ti_sd_90 = reactable::colDef( |
153 | 1x |
"TI<sub>σ,90th</sub>", |
154 | 1x |
cell = cell_lt_one, |
155 | 1x |
style = style_lt_one |
156 |
) |
|
157 |
), |
|
158 | 1x |
columnGroups = list( |
159 | 1x |
reactable::colGroup( |
160 | 1x |
name = "Temporal Indicators", |
161 | 1x |
columns = c("ti_bias_90", "ti_cor_90", "ti_sd_90") |
162 |
) |
|
163 |
) |
|
164 |
) |
|
165 |
} |
|
166 | ||
167 |
#' @noRd |
|
168 |
tabulate_short_by_site <- function(data, style_lt_one, style_type) { |
|
169 | 1x |
reactable::reactable( |
170 | 1x |
dplyr::relocate(data, dplyr::starts_with("sd_"), .after = "mean_mod"), |
171 | 1x |
outlined = TRUE, |
172 | 1x |
defaultColDef = reactable::colDef( |
173 | 1x |
html = TRUE, |
174 | 1x |
align = "right", |
175 | 1x |
vAlign = "center", |
176 | 1x |
format = reactable::colFormat(digits = 2) |
177 |
), |
|
178 | 1x |
pagination = FALSE, |
179 | 1x |
columns = list( |
180 | 1x |
site = reactable::colDef("Site", align = "right"), |
181 | 1x |
type = reactable::colDef( |
182 | 1x |
"Type", |
183 | 1x |
align = "right", |
184 | 1x |
cell = cell_type, |
185 | 1x |
style = style_type |
186 |
), |
|
187 | 1x |
mean_obs = reactable::colDef( |
188 | 1x |
'<span style="text-decoration: overline;">O</span>' |
189 |
), |
|
190 | 1x |
mean_mod = reactable::colDef( |
191 | 1x |
'<span style="text-decoration: overline;">M</span>' |
192 |
), |
|
193 | 1x |
bias = reactable::colDef("BIAS"), |
194 | 1x |
rmse = reactable::colDef("RMSE"), |
195 | 1x |
rmsu = reactable::colDef("RMSU"), |
196 | 1x |
rmsu_star = reactable::colDef("RMSU*"), |
197 | 1x |
r = reactable::colDef("COR."), |
198 | 1x |
sd_mod = reactable::colDef("σ<sub>M</sub>"), |
199 | 1x |
sd_obs = reactable::colDef("σ<sub>O</sub>"), |
200 | 1x |
crmse = reactable::colDef("CRMSE"), |
201 | 1x |
mqi = reactable::colDef("MQI", cell = cell_lt_one, style = style_lt_one), |
202 | 1x |
ti_bias = reactable::colDef( |
203 | 1x |
"TI<sub>B</sub>", |
204 | 1x |
cell = cell_lt_one, |
205 | 1x |
style = style_lt_one |
206 |
), |
|
207 | 1x |
ti_cor = reactable::colDef( |
208 | 1x |
"TI<sub>R</sub>", |
209 | 1x |
cell = cell_lt_one, |
210 | 1x |
style = style_lt_one |
211 |
), |
|
212 | 1x |
ti_sd = reactable::colDef( |
213 | 1x |
"TI<sub>σ</sub>", |
214 | 1x |
cell = cell_lt_one, |
215 | 1x |
style = style_lt_one |
216 |
), |
|
217 | 1x |
ti_crmse = reactable::colDef( |
218 | 1x |
"TI<sub>CRMSE</sub>", |
219 | 1x |
cell = cell_lt_one, |
220 | 1x |
style = style_lt_one |
221 |
) |
|
222 |
), |
|
223 | 1x |
columnGroups = list( |
224 | 1x |
reactable::colGroup( |
225 | 1x |
name = "Temporal Indicators", |
226 | 1x |
columns = c("ti_bias", "ti_cor", "ti_sd", "ti_crmse") |
227 |
) |
|
228 |
) |
|
229 |
) |
|
230 |
} |
|
231 | ||
232 |
#' @noRd |
|
233 |
tabulate_long_summary <- function(data, style_lt_one) { |
|
234 | 1x |
reactable::reactable( |
235 | 1x |
dplyr::relocate(data, "mqo", "mqi_90"), |
236 | 1x |
outlined = TRUE, |
237 | 1x |
defaultColDef = reactable::colDef( |
238 | 1x |
html = TRUE, |
239 | 1x |
format = reactable::colFormat(digits = 2) |
240 |
), |
|
241 | 1x |
pagination = FALSE, |
242 | 1x |
columns = list( |
243 | 1x |
mqo = reactable::colDef( |
244 | 1x |
"MQO", |
245 | 1x |
align = "right", |
246 | 1x |
cell = function(value) { |
247 | 1x |
ifelse(value, paste("Pass ", svg_check), paste("Fail ", svg_cross)) |
248 |
}, |
|
249 | 1x |
style = style_lt_one |
250 |
), |
|
251 | 1x |
mqi_90 = reactable::colDef( |
252 | 1x |
"MQI<sub>90th</sub>", |
253 | 1x |
cell = cell_lt_one, |
254 | 1x |
style = style_lt_one |
255 |
), |
|
256 | 1x |
bias = reactable::colDef("BIAS"), |
257 | 1x |
rmse = reactable::colDef("RMSE"), |
258 | 1x |
r = reactable::colDef("COR."), |
259 | 1x |
sd_mod = reactable::colDef("σ<sub>M</sub>"), |
260 | 1x |
sd_obs = reactable::colDef("σ<sub>O</sub>"), |
261 | 1x |
rmsu = reactable::colDef("RMSU") |
262 |
) |
|
263 |
) |
|
264 |
} |
|
265 | ||
266 |
#' @noRd |
|
267 |
tabulate_long_by_type <- function(data, style_lt_one, style_type) { |
|
268 | 1x |
reactable::reactable( |
269 | 1x |
data, |
270 | 1x |
outlined = TRUE, |
271 | 1x |
defaultColDef = reactable::colDef( |
272 | 1x |
html = TRUE, |
273 | 1x |
format = reactable::colFormat(digits = 2) |
274 |
), |
|
275 | 1x |
pagination = FALSE, |
276 | 1x |
columns = list( |
277 | 1x |
type = reactable::colDef( |
278 | 1x |
"Type", |
279 | 1x |
align = "right", |
280 | 1x |
cell = cell_type, |
281 | 1x |
style = style_type |
282 |
), |
|
283 | 1x |
si_bias = reactable::colDef( |
284 | 1x |
"SI<sub>B</sub>", |
285 | 1x |
cell = cell_lt_one, |
286 | 1x |
style = style_lt_one |
287 |
), |
|
288 | 1x |
si_cor = reactable::colDef( |
289 | 1x |
"SI<sub>R</sub>", |
290 | 1x |
cell = cell_lt_one, |
291 | 1x |
style = style_lt_one |
292 |
), |
|
293 | 1x |
si_sd = reactable::colDef( |
294 | 1x |
"SI<sub>σ</sub>", |
295 | 1x |
cell = cell_lt_one, |
296 | 1x |
style = style_lt_one |
297 |
), |
|
298 | 1x |
si_rmse = reactable::colDef( |
299 | 1x |
"SI<sub>RMSE</sub>", |
300 | 1x |
cell = cell_lt_one, |
301 | 1x |
style = style_lt_one |
302 |
) |
|
303 |
), |
|
304 | 1x |
columnGroups = list( |
305 | 1x |
reactable::colGroup( |
306 | 1x |
name = "Spatial Indicators", |
307 | 1x |
columns = c("si_bias", "si_cor", "si_sd", "si_rmse") |
308 |
) |
|
309 |
) |
|
310 |
) |
|
311 |
} |
|
312 | ||
313 |
#' @noRd |
|
314 |
tabulate_long_by_site <- function(data, style_lt_one, style_type) { |
|
315 | 1x |
reactable::reactable( |
316 | 1x |
dplyr::relocate(data, "mqi", .after = "type"), |
317 | 1x |
outlined = TRUE, |
318 | 1x |
defaultColDef = reactable::colDef( |
319 | 1x |
html = TRUE, |
320 | 1x |
format = reactable::colFormat(digits = 2) |
321 |
), |
|
322 | 1x |
pagination = FALSE, |
323 | 1x |
columns = list( |
324 | 1x |
site = reactable::colDef("Site", align = "right"), |
325 | 1x |
type = reactable::colDef( |
326 | 1x |
"Type", |
327 | 1x |
align = "right", |
328 | 1x |
cell = cell_type, |
329 | 1x |
style = style_type |
330 |
), |
|
331 | 1x |
obs = reactable::colDef( |
332 | 1x |
'<span style="text-decoration: overline;">O</span>' |
333 |
), |
|
334 | 1x |
mod = reactable::colDef( |
335 | 1x |
'<span style="text-decoration: overline;">M</span>' |
336 |
), |
|
337 | 1x |
uncer = reactable::colDef( |
338 | 1x |
"U(ō)" |
339 |
), |
|
340 | 1x |
ar_low = reactable::colDef( |
341 | 1x |
"AR<sub>low</sub>" |
342 |
), |
|
343 | 1x |
ar_up = reactable::colDef( |
344 | 1x |
"AR<sub>up</sub>" |
345 |
), |
|
346 | 1x |
mqi = reactable::colDef("MQI", cell = cell_lt_one, style = style_lt_one) |
347 |
) |
|
348 |
) |
|
349 |
} |
|
350 | ||
351 |
#' @noRd |
|
352 |
tabulate_inputs <- function(data, term) { |
|
353 | 2x |
tibble::tibble(term = data$term, pollutant = data$pollutant) |> |
354 | 2x |
dplyr::bind_cols( |
355 | 2x |
data$params_fixed |> |
356 | 2x |
dplyr::rename_with(.fn = \(x) paste0("f_", x)) |
357 |
) |> |
|
358 | 2x |
dplyr::bind_cols( |
359 | 2x |
data$params_indicative |> |
360 | 2x |
dplyr::rename_with(.fn = \(x) paste0("i_", x)) |
361 |
) |> |
|
362 | 2x |
dplyr::select(-"i_rv") |> |
363 | 2x |
reactable::reactable( |
364 | 2x |
outlined = TRUE, |
365 | 2x |
filterable = FALSE, |
366 | 2x |
searchable = FALSE, |
367 | 2x |
pagination = FALSE, |
368 | 2x |
defaultColDef = reactable::colDef( |
369 | 2x |
align = "right", |
370 | 2x |
html = TRUE |
371 |
), |
|
372 | 2x |
columns = list( |
373 | 2x |
term = reactable::colDef("Term", align = "right"), |
374 | 2x |
pollutant = reactable::colDef("Pollutant", align = "right"), |
375 | 2x |
f_rv = reactable::colDef(glue::glue("RV<sub>{term}</sub>")), |
376 | 2x |
f_u_rv = reactable::colDef(glue::glue( |
377 | 2x |
"U<sub>0r</sub>(RV<sub>{term}</sub>)" |
378 |
)), |
|
379 | 2x |
f_a = reactable::colDef(glue::glue("α<sub>{term}</sub>")), |
380 | 2x |
f_b = reactable::colDef(glue::glue("β<sub>{term}</sub>")), |
381 | 2x |
i_u_rv = reactable::colDef(glue::glue( |
382 | 2x |
"U<sub>0r</sub>(RV<sub>{term}</sub>)" |
383 |
)), |
|
384 | 2x |
i_a = reactable::colDef(glue::glue("α<sub>{term}</sub>")), |
385 | 2x |
i_b = reactable::colDef(glue::glue("β<sub>{term}</sub>")) |
386 |
), |
|
387 | 2x |
columnGroups = list( |
388 | 2x |
reactable::colGroup( |
389 | 2x |
"Fixed", |
390 | 2x |
align = "right", |
391 | 2x |
columns = c("f_u_rv", "f_a", "f_b"), |
392 |
), |
|
393 | 2x |
reactable::colGroup( |
394 | 2x |
"Indicative", |
395 | 2x |
align = "right", |
396 | 2x |
columns = c("i_u_rv", "i_a", "i_b") |
397 |
) |
|
398 |
) |
|
399 |
) |
|
400 |
} |
|
401 | ||
402 |
#' @noRd |
|
403 |
cell_lt_one <- function(value) { |
|
404 | 99x |
ifelse( |
405 | 99x |
value <= 1, |
406 | 99x |
paste(round(value, 2), svg_check), |
407 | 99x |
paste(round(value, 2), svg_cross) |
408 |
) |
|
409 |
} |
|
410 | ||
411 |
#' @noRd |
|
412 |
make_style_lt_one <- function(color_pass, color_fail) { |
|
413 | 2x |
function(value) { |
414 | 101x |
if (is.logical(value)) { |
415 | 2x |
color <- ifelse(value, color_pass, color_fail) |
416 |
} else { |
|
417 | 99x |
color <- ifelse(value <= 1, color_pass, color_fail) |
418 |
} |
|
419 | 101x |
list(color = color, fontWeight = "bold") |
420 |
} |
|
421 |
} |
|
422 | ||
423 |
#' @noRd |
|
424 |
cell_type <- function(value) { |
|
425 | 31x |
value <- stringr::str_to_title(value) |
426 | 31x |
ifelse(value == "Fixed", paste(value, svg_building), paste(value, svg_box)) |
427 |
} |
|
428 | ||
429 |
#' @noRd |
|
430 |
make_style_type <- function(color_fixed, color_indicative) { |
|
431 | 2x |
function(value) { |
432 | 31x |
color <- ifelse(value == "fixed", color_fixed, color_indicative) |
433 | 31x |
list(color = color) |
434 |
} |
|
435 |
} |
|
436 | ||
437 |
svg_check <- |
|
438 |
"<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 16 16\" class=\"bi bi-check-circle \" style=\"height:1em;width:1em;fill:currentColor;vertical-align:-0.125em;\" aria-hidden=\"true\" role=\"img\" ><path d=\"M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z\"></path>\n<path d=\"M10.97 4.97a.235.235 0 0 0-.02.022L7.477 9.417 5.384 7.323a.75.75 0 0 0-1.06 1.06L6.97 11.03a.75.75 0 0 0 1.079-.02l3.992-4.99a.75.75 0 0 0-1.071-1.05z\"></path></svg>" |
|
439 | ||
440 |
svg_cross <- |
|
441 |
"<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 16 16\" class=\"bi bi-x-circle \" style=\"height:1em;width:1em;fill:currentColor;vertical-align:-0.125em;\" aria-hidden=\"true\" role=\"img\" ><path d=\"M8 15A7 7 0 1 1 8 1a7 7 0 0 1 0 14zm0 1A8 8 0 1 0 8 0a8 8 0 0 0 0 16z\"></path>\n<path d=\"M4.646 4.646a.5.5 0 0 1 .708 0L8 7.293l2.646-2.647a.5.5 0 0 1 .708.708L8.707 8l2.647 2.646a.5.5 0 0 1-.708.708L8 8.707l-2.646 2.647a.5.5 0 0 1-.708-.708L7.293 8 4.646 5.354a.5.5 0 0 1 0-.708z\"></path></svg>" |
|
442 | ||
443 |
svg_building <- |
|
444 |
"<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 16 16\" class=\"bi bi-building \" style=\"height:1em;width:1em;fill:currentColor;vertical-align:-0.125em;\" aria-hidden=\"true\" role=\"img\" ><path d=\"M4 2.5a.5.5 0 0 1 .5-.5h1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-1a.5.5 0 0 1-.5-.5v-1Zm3 0a.5.5 0 0 1 .5-.5h1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-1a.5.5 0 0 1-.5-.5v-1Zm3.5-.5a.5.5 0 0 0-.5.5v1a.5.5 0 0 0 .5.5h1a.5.5 0 0 0 .5-.5v-1a.5.5 0 0 0-.5-.5h-1ZM4 5.5a.5.5 0 0 1 .5-.5h1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-1a.5.5 0 0 1-.5-.5v-1ZM7.5 5a.5.5 0 0 0-.5.5v1a.5.5 0 0 0 .5.5h1a.5.5 0 0 0 .5-.5v-1a.5.5 0 0 0-.5-.5h-1Zm2.5.5a.5.5 0 0 1 .5-.5h1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-1a.5.5 0 0 1-.5-.5v-1ZM4.5 8a.5.5 0 0 0-.5.5v1a.5.5 0 0 0 .5.5h1a.5.5 0 0 0 .5-.5v-1a.5.5 0 0 0-.5-.5h-1Zm2.5.5a.5.5 0 0 1 .5-.5h1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-1a.5.5 0 0 1-.5-.5v-1Zm3.5-.5a.5.5 0 0 0-.5.5v1a.5.5 0 0 0 .5.5h1a.5.5 0 0 0 .5-.5v-1a.5.5 0 0 0-.5-.5h-1Z\"></path>\n<path d=\"M2 1a1 1 0 0 1 1-1h10a1 1 0 0 1 1 1v14a1 1 0 0 1-1 1H3a1 1 0 0 1-1-1V1Zm11 0H3v14h3v-2.5a.5.5 0 0 1 .5-.5h3a.5.5 0 0 1 .5.5V15h3V1Z\"></path></svg>" |
|
445 | ||
446 |
svg_box <- |
|
447 |
"<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 16 16\" class=\"bi bi-box \" style=\"height:1em;width:1em;fill:currentColor;vertical-align:-0.125em;\" aria-hidden=\"true\" role=\"img\" ><path d=\"M8.186 1.113a.5.5 0 0 0-.372 0L1.846 3.5 8 5.961 14.154 3.5 8.186 1.113zM15 4.239l-6.5 2.6v7.922l6.5-2.6V4.24zM7.5 14.762V6.838L1 4.239v7.923l6.5 2.6zM7.443.184a1.5 1.5 0 0 1 1.114 0l7.129 2.852A.5.5 0 0 1 16 3.5v8.662a1 1 0 0 1-.629.928l-7.185 2.874a.5.5 0 0 1-.372 0L.63 13.09a1 1 0 0 1-.63-.928V3.5a.5.5 0 0 1 .314-.464L7.443.184z\"></path></svg>" |
1 |
#' Filter a dataset by temporal characteristics |
|
2 |
#' |
|
3 |
#' These functions filter a dataset using features of the "date" column, as |
|
4 |
#' defined by [mqo_dict()]. Current options include the year, the month of the |
|
5 |
#' year, the day of the week, and the hour of the day. These options allow users |
|
6 |
#' to flexibly filter their data; for example, returning only weekday evenings |
|
7 |
#' in Summer months. All but [filter_year()] require short-term data; |
|
8 |
#' [filter_year()] will work out whether the data is short- or long-term based |
|
9 |
#' on the data type of the "date" column. |
|
10 |
#' |
|
11 |
#' @inheritParams summarise_mqo_stats |
|
12 |
#' |
|
13 |
#' @param years,months,wdays,hours A numeric vector indicating the years (any |
|
14 |
#' intger), months of the year (`1` to `12`), days of the week (`1` to `7`, |
|
15 |
#' where `1` is Sunday), or hours of the day (`1` to `24`) to retain in the |
|
16 |
#' data. |
|
17 |
#' |
|
18 |
#' @family data utilities |
|
19 |
#' |
|
20 |
#' @rdname filter-time |
|
21 |
#' @export |
|
22 |
filter_year <- function(data, years, dict = mqor::mqo_dict()) { |
|
23 | 1x |
if (is.numeric(data[[dict$date]])) { |
24 | ! |
id <- data[[dict$date]] %in% years |
25 |
} else { |
|
26 | 1x |
id <- lubridate::year(data[[dict$date]]) %in% years |
27 |
} |
|
28 | 1x |
return(data[id, ]) |
29 |
} |
|
30 | ||
31 |
#' @rdname filter-time |
|
32 |
#' @export |
|
33 |
filter_month <- function(data, months = 1:12, dict = mqor::mqo_dict()) { |
|
34 | 2x |
validate_filter_inputs(months, 1:12, "months") |
35 | 1x |
id <- lubridate::month(data[[dict$date]]) %in% months |
36 | 1x |
return(data[id, ]) |
37 |
} |
|
38 | ||
39 |
#' @rdname filter-time |
|
40 |
#' @export |
|
41 |
filter_wday <- function(data, wdays = 1:7, dict = mqor::mqo_dict()) { |
|
42 | 1x |
validate_filter_inputs(wdays, 1:7, "wdays") |
43 | 1x |
id <- lubridate::wday(data[[dict$date]], week_start = 7) %in% wdays |
44 | 1x |
return(data[id, ]) |
45 |
} |
|
46 | ||
47 |
#' @rdname filter-time |
|
48 |
#' @export |
|
49 |
filter_hour <- function(data, hours = 0:23, dict = mqor::mqo_dict()) { |
|
50 | 1x |
validate_filter_inputs(hours, 0:23, "hours") |
51 | 1x |
id <- lubridate::hour(data[[dict$date]]) %in% hours |
52 | 1x |
return(data[id, ]) |
53 |
} |
|
54 | ||
55 |
#' @noRd |
|
56 |
validate_filter_inputs <- function(x, allowed, field) { |
|
57 | 4x |
error <- x[!x %in% allowed] |
58 | 4x |
if (length(error) > 0 || !rlang::is_integerish(x)) { |
59 | 1x |
cli::cli_abort(c( |
60 | 1x |
"{.field {field}} should only cover the integer values {min{allowed}} to {max{allowed}}.", |
61 | 1x |
"Bad values: {error}" |
62 |
)) |
|
63 |
} |
|
64 |
} |
1 |
#' Get path to `{mqor}` example files |
|
2 |
#' |
|
3 |
#' `{mqor}` comes bundled with some sample datasets in its `inst/demo_files` |
|
4 |
#' directory. These are identical to [demo_shortterm] and [demo_longterm], and |
|
5 |
#' can be read using [read_mqor()]. This function makes these files easy to |
|
6 |
#' access. |
|
7 |
#' |
|
8 |
#' @param file Path to file. If `NULL`, the example files will be listed. |
|
9 |
#' @seealso [demo_shortterm], [download_demo_files()] |
|
10 |
#' @export |
|
11 |
#' @examples |
|
12 |
#' demo_files() |
|
13 |
#' demo_files("demo_longterm.csv") |
|
14 |
demo_files <- function(file = NULL) { |
|
15 | 4x |
opts <- dir(system.file("demo_files", package = "mqor")) |
16 | 4x |
if (is.null(file)) { |
17 | 1x |
return(opts) |
18 |
} else { |
|
19 | 3x |
file <- rlang::arg_match(file, opts, multiple = FALSE) |
20 | 3x |
system.file("demo_files", file, package = "mqor", mustWork = TRUE) |
21 |
} |
|
22 |
} |
|
23 | ||
24 |
#' Download larger example files from GitLab |
|
25 |
#' |
|
26 |
#' As well as the simple files found via [demo_files()], `{mqor}` provides more |
|
27 |
#' complex data which is too large to bundle with an R package. This function |
|
28 |
#' downloads these files to a directory. These files are the demo dataset found |
|
29 |
#' in the DELTA tool, reformatted for [read_mqor()]. |
|
30 |
#' |
|
31 |
#' @param dir The destination directory for the files. |
|
32 |
#' |
|
33 |
#' @param files A vector for the specific files to download. |
|
34 |
#' |
|
35 |
#' @seealso [demo_files()] |
|
36 |
#' |
|
37 |
#' @export |
|
38 |
download_demo_files <- function( |
|
39 |
dir, |
|
40 |
files = c("delta_attributes.csv", "delta_shortterm.csv", "delta_longterm.csv") |
|
41 |
) { |
|
42 | ! |
urlstr <- "https://code.europa.eu/jrcairqualitymodelling/mqor/-/raw/main/data-delta/FILE?ref_type=heads" |
43 | ||
44 | ! |
files <- rlang::arg_match(files, multiple = TRUE) |
45 | ||
46 | ! |
if (!dir.exists(dir)) { |
47 | ! |
dir.create(dir) |
48 |
} |
|
49 | ! |
for (i in files) { |
50 | ! |
utils::download.file( |
51 | ! |
url = gsub("FILE", i, urlstr), |
52 | ! |
destfile = file.path(dir, i) |
53 |
) |
|
54 |
} |
|
55 |
} |
|
56 | ||
57 |
#' Simple demonstration datasets taken from the CEN technical specification |
|
58 |
#' |
|
59 |
#' These two datasets contain dummy data outlined in Annex G of *Ambient Air — |
|
60 |
#' Definition and use of modelling quality objectives for air quality |
|
61 |
#' assessment*. These data are mostly useful for demonstrating `mqor` |
|
62 |
#' functionality for teaching and learning purposes. `demo_shortterm` should be |
|
63 |
#' used with `mqo_params(term = "short")` and `demo_longterm` with |
|
64 |
#' `mqo_params(term = "long")`. Note that `demo_shortterm` only has 5 rows of |
|
65 |
#' data for expedience; real datasets should have periodic data representing a |
|
66 |
#' whole year at least. |
|
67 |
#' |
|
68 |
#' \describe{ |
|
69 |
#' \item{site}{Site identifiers; S1 through S15.} |
|
70 |
#' \item{type}{Site type; all are 'fixed' butcould also be 'indicative'.} |
|
71 |
#' \item{param}{The pollutant (in this case "PM10")} |
|
72 |
#' \item{date}{The datetime of the measurement. An integer in `demo_longterm`, and a Date in `demo_shortterm`.} |
|
73 |
#' \item{obs}{Observed (measured) values.} |
|
74 |
#' \item{mod}{Modelled values.} |
|
75 |
#' } |
|
76 |
#' |
|
77 |
#' @rdname demo-data |
|
78 |
#' @seealso [demo_files()] |
|
79 |
#' @order 1 |
|
80 |
#' @source Ambient Air — Definition and use of modelling quality objectives for |
|
81 |
#' air quality assessment (TC 264 WI 00264248:2025(E)) |
|
82 |
#' |
|
83 |
#' @examples |
|
84 |
#' demo_shortterm |
|
85 |
"demo_shortterm" |
|
86 | ||
87 |
#' @rdname demo-data |
|
88 |
#' @order 2 |
|
89 |
#' |
|
90 |
#' @examples |
|
91 |
#' demo_longterm |
|
92 |
"demo_longterm" |
1 |
#' Format DELTA-formatted files in a way needed by `{mqor}` |
|
2 |
#' |
|
3 |
#' This function ingests monitoring and modeling data, as well as site metadata |
|
4 |
#' contained within the `startup.ini` file, and returns a `data.frame` in an |
|
5 |
#' appropriate format for other `{mqor}` functions (e.g., |
|
6 |
#' [summarise_mqo_stats()]). |
|
7 |
#' |
|
8 |
#' @param monitoring A `data.frame` imported using [read_delta_data_delim()] |
|
9 |
#' containing monitoring data. |
|
10 |
#' |
|
11 |
#' @param modeling One or more `data.frames` imported using |
|
12 |
#' [read_delta_data_delim()] or [read_delta_data_cdf()] containing modeling |
|
13 |
#' data. Multiple objects should be provided as a list. |
|
14 |
#' |
|
15 |
#' @param startup The object returned by [read_delta_resource()] when `file = |
|
16 |
#' "startup_*.ini"` |
|
17 |
#' |
|
18 |
#' @param data_type Whether to label the DELTA input data as representing |
|
19 |
#' `"fixed"` or `"indicative"` measurements. Defaults to `"fixed"`. |
|
20 |
#' |
|
21 |
#' @export |
|
22 |
fmt_delta_for_mqor <- function( |
|
23 |
monitoring, |
|
24 |
modeling, |
|
25 |
startup, |
|
26 |
data_type = c("fixed", "indicative") |
|
27 |
) { |
|
28 | ! |
data_type <- rlang::arg_match(data_type) |
29 | ||
30 | ! |
joined <- |
31 | ! |
dplyr::left_join( |
32 | ! |
monitoring, |
33 | ! |
dplyr::bind_rows(modeling), |
34 | ! |
dplyr::join_by("date", "site", "param") |
35 |
) |
|
36 | ||
37 | ! |
meta <- startup$monitoring |
38 | ! |
names(meta)[names(meta) == "station_name"] <- "site" |
39 | ||
40 | ! |
joined <- |
41 | ! |
dplyr::left_join(joined, meta, dplyr::join_by("site")) |> |
42 | ! |
dplyr::mutate(type = data_type, .after = "site") |
43 | ||
44 | ! |
return(joined) |
45 |
} |
1 |
#' Launch the `{mqor}` Shiny Interface |
|
2 |
#' |
|
3 |
#' This app runs [shiny::shinyAppDir()] to launching a web interface built into |
|
4 |
#' `{mqor}` which allows users to use certain functions without needing to write |
|
5 |
#' any R scripts. |
|
6 |
#' |
|
7 |
#' @inheritParams shiny::shinyAppDir |
|
8 |
#' |
|
9 |
#' @author Jack Davison |
|
10 |
#' |
|
11 |
#' @export |
|
12 |
launch_app <- function(options = list()) { |
|
13 | ! |
rlang::check_installed(c( |
14 | ! |
"shiny", |
15 | ! |
"bslib", |
16 | ! |
"plotly", |
17 | ! |
"leaflet", |
18 | ! |
"rlang", |
19 | ! |
"reactable" |
20 |
)) |
|
21 | ! |
shiny::shinyAppDir( |
22 | ! |
appDir = system.file("shiny", package = "mqor"), |
23 | ! |
options = options |
24 |
) |
|
25 |
} |