R/scenario.R
scenario_family.RdA "scenario" identifies a single simulation run. Field names and values are discovered from the driver queue (Google Sheet); they are not hardcoded in this package. The same run can be referred to in three interchangeable ways:
Field values (one column per field in the queue), e.g.
(.ELFind = "6.3.1", .samplingRange = 2071:2100, ...).
An output directory path under outputs/.
An upload tar filename (path with / -> _ and .tar.gz suffix).
This file defines:
a canonical record (S3 class "scenario");
the generic as_scenario() for coercing any representation into it;
formatters as_path() / as_tarname() for going back;
default builders pathBuild() / pathParse(): each non-empty
field's value (no label) is one path segment, joined by /,
in the order given by scenarioFields(). Integer-and-contiguous
vectors render as start-end. Empty / NA fields are skipped
entirely (one fewer segment); see pathParse() for the
trailing-NA round-trip caveat.
Per-project format overrides: define your own pathBuild (and
matching pathParse) in the global environment, or register them
explicitly with register_scenario_format(). Lookup order, highest
first: register_scenario_format slot -> a pathBuild/pathParse
in the global environment -> the package default.
Field discovery: queueRead() caches the queue's non-meta column
names as the active field set. Subsequent pathParse() calls use
those labels for positional decoding. If you parse paths without
first reading a queue, pass fields = c(...) explicitly (or call
scenarioFieldsSet()).
if (FALSE) { # \dontrun{
## --- Default (generic) format -----------------------------------------
queue <- queueRead(folder = ss_id, name = "longRuns")
# -> data.table with columns .ELFind, .samplingRange, .GCM, .SSP, .rep
# plus meta columns (status, started_at, ...). Non-meta columns are
# auto-cached as scenarioFields().
scens <- as_scenario(queue) # list of `scenario` objects
as_path(scens[[1]])
#> "outputs/6.3.1/2071-2100/CNRM-ESM2-1/370/5"
as_tarname(scens[[1]])
#> "6.3.1_2071-2100_CNRM-ESM2-1_370_5.tar.gz"
# Round-trip
s2 <- as_scenario("outputs/6.3.1/2071-2100/CNRM-ESM2-1/370/5")
identical(unclass(scens[[1]]), unclass(s2)) # TRUE
# Cross-reference queue against uploaded tarballs
uploads <- outScenarios(.uploadGSdir) # list of scenarios
missing <- queueUploadMissing(folder = ss_id, name = "longRuns",
uploadFolder = .uploadGSdir) # queue rows only
## --- Per-field labels in the path -------------------------------------
# `withFieldLabel` accepts two forms.
# 1) Unnamed character vector: prefix with the field name itself.
as_path(scens[[1]], withFieldLabel = c(".rep", ".SSP"))
#> "outputs/6.3.1/2071-2100/CNRM-ESM2-1/.SSP370/.rep5"
# 2) Named character vector: prefix with the *mapped* label
# (e.g., emit `.rep` as `rep`, `.SSP` as `_ssp`).
as_path(scens[[1]], withFieldLabel = c(.rep = "rep", .SSP = "_ssp"))
#> "outputs/6.3.1/2071-2100/CNRM-ESM2-1/_ssp370/rep5"
# Set once for every subsequent as_path() / as_tarname():
register_scenario_format(withFieldLabel = c(.rep = "rep", .SSP = "_ssp"))
as_path(scens[[1]])
#> "outputs/6.3.1/2071-2100/CNRM-ESM2-1/_ssp370/rep5"
as_tarname(scens[[1]])
#> "6.3.1_2071-2100_CNRM-ESM2-1__ssp370_rep5.tar.gz"
# Round-trip parses back to canonical fields:
as_scenario("outputs/6.3.1/2071-2100/CNRM-ESM2-1/_ssp370/rep5")
## --- Project-specific format (FireSenseTesting layout) ----------------
# Layout: outputs/<.ELFind>/<range>/<GCM>_ssp<SSP>/rep<.rep>
# E.g. outputs/6.3.1/2071-2100/CNRM-ESM2-1_ssp370/rep5
myBuild <- function(.ELFind, .samplingRange, .GCM, .SSP, .rep,
pre = "outputs") {
sr <- if (is.numeric(.samplingRange)) .samplingRange
else eval(parse(text = .samplingRange))
file.path(pre, .ELFind,
paste(range(sr), collapse = "-"),
paste0(.GCM, ifelse(is.na(.SSP), "", paste0("_ssp", .SSP))),
paste0("rep", .rep))
}
myParse <- function(path, fields = scenarioFields(), pre = "outputs") {
clean <- sub("\\.tar\\.gz$", "", path)
clean <- sub(paste0("^", pre, "[/_]"), "", clean)
parts <- if (grepl("/", clean)) strsplit(clean, "/")[[1L]]
else strsplit(clean, "_")[[1L]]
repIdx <- which(grepl("^rep[0-9]+$", parts))
rangeIdx <- which(grepl("^[0-9]+-[0-9]+$", parts))
gcmSsp <- paste(parts[(rangeIdx + 1L):(repIdx - 1L)], collapse = "_")
gs <- if (grepl("_ssp", gcmSsp)) strsplit(gcmSsp, "_ssp")[[1L]]
else c(gcmSsp, NA_character_)
rng <- as.integer(strsplit(parts[rangeIdx], "-")[[1L]])
list(.ELFind = paste(parts[seq_len(rangeIdx - 1L)], collapse = "_"),
.samplingRange = rng[1L]:rng[2L],
.GCM = gs[1L],
.SSP = gs[2L],
.rep = as.integer(sub("^rep", "", parts[repIdx])))
}
register_scenario_format(build = myBuild, parse = myParse)
as_path(scens[[1]])
#> "outputs/6.3.1/2071-2100/CNRM-ESM2-1_ssp370/rep5"
as_tarname(scens[[1]])
#> "6.3.1_2071-2100_CNRM-ESM2-1_ssp370_rep5.tar.gz"
# Equivalent: define pathBuild / pathParse in your global environment
# (e.g. in a project global.R) -- they will be auto-detected.
pathBuild <- myBuild
pathParse <- myParse
} # }