diff --git a/CHANGELOG.md b/CHANGELOG.md index cb5f9dec1d5..a5cfef12f01 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,7 @@ For more information about this file see also [Keep a Changelog](http://keepacha `MCMCpack`, `mvtnorm`, `neonUtilities`, `neonstore`, `PEcAn.benchmark`, `PEcAn.visualization`, `rjags`, `sirt`, and `sp` from `Imports` to `Suggests` (@omkarrr2533, #3599). +- Management events specified via `events.json` are now required to specify a crop code for each planting event, so that models can know when to restart with a different PFT (#3828, #3836). diff --git a/base/remote/R/check_model_run.R b/base/remote/R/check_model_run.R index c74184d7f01..2c733d875e5 100644 --- a/base/remote/R/check_model_run.R +++ b/base/remote/R/check_model_run.R @@ -10,9 +10,9 @@ check_model_run <- function(out, stop.on.error = TRUE) { success <- FALSE msg <- paste0("Model run aborted with the following error:\n", out) if (stop.on.error) { - PEcAn.logger::logger.severe(msg) + PEcAn.logger::logger.severe(msg, wrap = FALSE) } else { - PEcAn.logger::logger.error(msg) + PEcAn.logger::logger.error(msg, wrap = FALSE) } } else { success <- TRUE diff --git a/book_source/03_topical_pages/02_pecan_standards.Rmd b/book_source/03_topical_pages/02_pecan_standards.Rmd index b4e81427cb7..29605b65b7e 100644 --- a/book_source/03_topical_pages/02_pecan_standards.Rmd +++ b/book_source/03_topical_pages/02_pecan_standards.Rmd @@ -97,6 +97,86 @@ See the [Soil Data] section on more into on creating a standard soil data file. See the [Vegetation Data] section on more info on creating a standard vegetation data file +### Management events + +Externally imposed changes in system state, such as planting and harvest or tillage of crop systems, are represented as a sequence of "events" that are each defined by a date, event type, and one or more event-specific properties that describe the kind and magnitude of change imposed. PEcAn passes the same representation of a given event to all models, but interpretation of all properties -- including dates and event types -- is necessarily model-specific and could include ignoring an event entirely. + +Event types are defined by the PEcAn events spec, which is currently provided as a JSON schema file bundled into the PEcAn.data.land package (TODO consider exposing this in a more linkable format?). Valid events files are stored as JSON, with each events file containing an array of sites. Each site contains an id, a schema version, and an array of events, usually recorded in date order. Each event has a date, an event type, and one or more values from the required and optional properties that are documented below for each event type. Any event is also allowed to contain arbitrary additional properties not mentioned in the spec; PEcAn will pass these on to models with no further validation. + +The currently supported event types are: planting, harvest, irrigation, fertilization, and tillage. Each of these is described briefly below. You will note that all of them are focused on agronomic management of croplands; future work will extend this framework to include disturbance of unmanaged systems such as fire, inundation or insect outbreaks. + + +#### planting + +Addition of live biomass that is expected to grow according to the model's plant growth rules. The current spec expects a "planting" to be a transplantation of leafed-out seedlings; models which represent planting as addition of unsprouted seeds might choose to adjust the date to account for germination time. + +Required properties: + +* `crop_code`: a machine-readable identifier that specifies what type of plant was added. Should preferably be short and use only characters that are safe in filenames. +* `leaf_c_kg_m2`: Amount of leaf biomass added. + +Optional properties: + +* `wood_c_kg_m2`: Biomass added as aboveground stems. Assumed zero if not specified. +* `fine_root_c_kg_m2`: Biomass added to belowground roots. Assumed zero if not specified. +* `cultivar`: Identifier for finer-grainer plant type identifiers that will be meaningful to models with detailed growth parameterizations but likely ignored by PFT-based models. +* `crop_display`: A human-readable name for the added plant type. + +#### harvest + +Removal of biomass from the living pool, either by transferring it to dead (litter) biomass or by removing it from the system. Specified as a fraction of live biomass. + +Required properties: +* `frac_above_removed_0to1`: Fraction of existing aboveground biomass removed from the system. + +Optional properties: + +* `frac_below_removed_0to1`: Fraction of existing beloweground biomass removed from the system. Assumed zero if not specified. +* `frac_above_to_litter_0to1`, `frac_below_to_litter_0to1`: fraction of existing above- or below-ground live biomass converted to dead litter. Assumed zero if not specified. +* `crop_code`: A machine_readable identifier that specifies what type of plant was harvested. +* `cultivar`: Identifier for finer-grainer plant type identifiers that will be meaningful to models with detailed growth parameterizations but likely ignored by PFT-based models. +* `crop_display`: A human-readable name for the added plant type. + +#### irrigation + +Addition of water to the system beyond that specified as precipitation in met files. Note that only the total amount of water is specified and not the application rate; assumptions about the duration of the event will be model-specific. + +Required properties: + +* `method`: How water was applied. One of "soil", "canopy", "flood". +* `amount_mm`: Amount of water applied. + +Optional properties: + +* `immed_evap_frac_0to1`: fraction of the applied water that evaporates before reaching the soil water pool. + +#### fertilization + +Addition of exogenous nutrients, including non-crop carbon (e.g. manure, compost). Nutrients other than N and C are not currently included but may be added in future schemas. + +Required properties: + +* at least one of `org_c_kg_m2`, `nh4_n_kg_m2`, `no3_n_kg_m2`, which are respectively the amount of organic C, ammonium N, and nitrate N added. + +Optional properties: + +* any of `org_c_kg_m2`, `nh4_n_kg_m2`, `no3_n_kg_m2` not already counted as required. All are assumed zero if not specified. +* `org_n_kg_m2`: the amount of N added in organic compounds. Assumed zero if not specified. If specifying, you probably want to specify `org_c_kg_m2` as well but the schema does not enforce this. + +#### tillage + +Physical disturbance of the soil and litter pools. Models differ greatly in their representation of tillage effects and this spec intentionally limits its representation to a simple intensity score whose maximum should be interpreted as "the most complete possible mixing/disturbance." Whether this is sufficient detail will be re-evaluated in future revisions. + +Required properties: + +* `tillage_eff_0to1`: Relative tillage intensity, with 0 meaning no disturbance at all and 1 meaning the most complete disturbance possible. + +Optional properties: + +* `intensity_category`: string giving additional information on the type or amount of tillage performed. +* `depth_m`: Depth of soil affected by the event. + + ## Output Standards * created by `model2netcdf` functions diff --git a/models/sipnet/NAMESPACE b/models/sipnet/NAMESPACE index efa0bc19570..376a4eb12cb 100644 --- a/models/sipnet/NAMESPACE +++ b/models/sipnet/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(combine_sipnet_out) export(mergeNC) export(met2model.SIPNET) export(model2netcdf.SIPNET) diff --git a/models/sipnet/R/combine_sipnet_out.R b/models/sipnet/R/combine_sipnet_out.R new file mode 100644 index 00000000000..8b73b1bef1c --- /dev/null +++ b/models/sipnet/R/combine_sipnet_out.R @@ -0,0 +1,36 @@ +#!/usr/bin/env Rscript + +#' Combine a bunch of `sipnet.out` files into a single file +#' +#' @param directory Parent directory to search for `sipnet.out` files. You must +#' provide either this or an explicit vector of `files`. +#' @param outfile File to which combined `sipnet.out` will be written +#' @param files Optional vector of paths to files to combine. All must be +#' readable with [read_sipnet_out()]. If `NULL` (default), looks for files +#' named `sipnet.out` in `directory` and its subdirectories, recursively. +#' +#' @return `outfile` (path to output file), invisibly +#' @export +combine_sipnet_out <- function(directory, outfile, files = NULL) { + if (missing(directory) && is.null(files)) { + PEcAn.logger::logger.severe("Must provide either `directory` or `files`") + } + if (is.null(files)) { + # NOTE that this expects file paths (including parent directories) to be + # lexicographically sorted. For the common case of segmented SIPNET runs, + # the parent directories are named `segment_001`, `segment_002`, etc., so + # this will work automatically. + # If you don't want to make this assumption, or have a custom sort order, + # pass `files` directly to this function. + files <- sort(list.files(directory, "sipnet\\.out", full.names = TRUE, recursive = TRUE)) + } + if (length(files) == 0) { + PEcAn.logger::logger.severe("No files provided; nothing to combine.") + } + flist <- lapply(files, read_sipnet_out) + combined <- do.call(rbind.data.frame, flist) + # Mimic the SIPNET fixed-width right-aligned format + combined_fwf <- format(combined, justify = "right") + utils::write.table(combined_fwf, outfile, row.names = FALSE, col.names = TRUE, quote = FALSE, sep = " ") + invisible(outfile) +} diff --git a/models/sipnet/R/model2netcdf.SIPNET.R b/models/sipnet/R/model2netcdf.SIPNET.R index 5d96f615fee..9ea15e48d15 100644 --- a/models/sipnet/R/model2netcdf.SIPNET.R +++ b/models/sipnet/R/model2netcdf.SIPNET.R @@ -13,6 +13,7 @@ #' } #' @export mergeNC #' @name mergeNC +#' @importFrom rlang .data #' @source https://github.com/RS-eco/processNC/blob/main/R/mergeNC.R mergeNC <- function( ##title<< Aggregate data in netCDF files @@ -61,38 +62,7 @@ model2netcdf.SIPNET <- function(outdir, sitelat, sitelon, start_date, end_date, overwrite = FALSE, conflict = FALSE) { ### Read in model output in SIPNET format sipnet_out_file <- file.path(outdir, prefix) - # SIPNET v1 had a "Notes" comment line before the header; v2 removed it. - # if the first line starts with "year", there is no Notes line. - first_line <- readLines(sipnet_out_file, n = 1) - skip_n <- if (grepl("^year", first_line)) 0 else 1 - # Temporary workaround until - # https://github.com/PecanProject/sipnet/issues/304 is resolved. - sipnet_output <- tryCatch({ - utils::read.table(sipnet_out_file, header = TRUE, skip = skip_n, sep = "") - }, error = function(err) { - PEcAn.logger::logger.warn( - "Failed to read using `read.table`. ", - "Trying to parse output manually." - ) - raw_lines <- readLines(sipnet_out_file) - raw_header <- raw_lines[[1 + skip_n]] - raw_body <- utils::tail(raw_lines, -(1 + skip_n)) - # SIPNET output is right-aligned with the column names in the header. - # We use this to figure out where the numbers end if there are no spaces. - token_matches <- gregexpr("\\S+", raw_header, perl = TRUE) - proc_header <- regmatches(raw_header, token_matches)[[1]] - col_ends <- token_matches[[1]] + attr(token_matches[[1]], "match.length") - 1 - col_starts <- c(1, utils::head(col_ends, -1) + 1) - col_widths <- col_ends - col_starts + 1 - result <- utils::read.fwf( - textConnection(raw_body), - widths = col_widths, - col.names = proc_header, - na.strings = c("nan", "-nan") - ) - result[] <- lapply(result, as.numeric) - result - }) + sipnet_output <- read_sipnet_out(sipnet_out_file) #sipnet_output_dims <- dim(sipnet_output) ### Determine number of years and output timestep diff --git a/models/sipnet/R/read_sipnet_out.R b/models/sipnet/R/read_sipnet_out.R new file mode 100644 index 00000000000..457a965803c --- /dev/null +++ b/models/sipnet/R/read_sipnet_out.R @@ -0,0 +1,40 @@ +#' Read `sipnet.out` file to `data.frame` +#' +#' @param sipnet_out_file Path to `sipnet.out` file +#' +#' @return `data.frame` of SIPNET output +read_sipnet_out <- function(sipnet_out_file) { + # SIPNET v1 had a "Notes" comment line before the header; v2 removed it. + # if the first line starts with "year", there is no Notes line. + first_line <- readLines(sipnet_out_file, n = 1) + skip_n <- if (grepl("^year", first_line)) 0 else 1 + # Temporary workaround until + # https://github.com/PecanProject/sipnet/issues/304 is resolved. + sipnet_output <- tryCatch({ + utils::read.table(sipnet_out_file, header = TRUE, skip = skip_n, sep = "") + }, error = function(err) { + PEcAn.logger::logger.warn( + "Failed to read using `read.table`. ", + "Trying to parse output manually." + ) + raw_lines <- readLines(sipnet_out_file) + raw_header <- raw_lines[[1 + skip_n]] + raw_body <- utils::tail(raw_lines, -(1 + skip_n)) + # SIPNET output is right-aligned with the column names in the header. + # We use this to figure out where the numbers end if there are no spaces. + token_matches <- gregexpr("\\S+", raw_header, perl = TRUE) + proc_header <- regmatches(raw_header, token_matches)[[1]] + col_ends <- token_matches[[1]] + attr(token_matches[[1]], "match.length") - 1 + col_starts <- c(1, utils::head(col_ends, -1) + 1) + col_widths <- col_ends - col_starts + 1 + result <- utils::read.fwf( + textConnection(raw_body), + widths = col_widths, + col.names = proc_header, + na.strings = c("nan", "-nan") + ) + result[] <- lapply(result, as.numeric) + result + }) + sipnet_output +} diff --git a/models/sipnet/R/split_inputs.SIPNET.R b/models/sipnet/R/split_inputs.SIPNET.R index 56aa09f786d..6e40bf9fe56 100644 --- a/models/sipnet/R/split_inputs.SIPNET.R +++ b/models/sipnet/R/split_inputs.SIPNET.R @@ -1,22 +1,108 @@ -## split clim file into smaller time units to use in KF -##' @title split_inputs.SIPNET -##' @name split_inputs.SIPNET -##' @author Mike Dietze and Ann Raiho -##' -##' @param start.time start date and time for each SDA ensemble -##' @param stop.time stop date and time for each SDA ensemble -##' @param inputs list of model inputs to use in write.configs.SIPNET -##' @param overwrite Default FALSE -##' @param outpath if specified, write output to a new directory. Default NULL writes back to the directory being read -##' @description Splits climate met for SIPNET -##' -##' @return file split up climate file -##' @importFrom rlang .data -##' @importFrom dplyr %>% -##' @export +#!/usr/bin/env Rscript + +#' Split SIPNET inputs into multiple files based on start and end time +#' +#' Subset each SIPNET input file and write a new file containing values `>= +#' start.time` and `< end.time` (note: end time is *not* inclusive) +#' +#' NOTE that sipnet met files contain dates _and_ times, while sipnet event +#' files contain only dates. Comparing a datetime to a date will coerce the +#' date to midnight UTC. +#' +#' @param start.time Start date or datetime for splitting +#' @param stop.time End date or datetime for splitting +#' @param inputs Named `inputs` list as provided by PEcAn `settings`. Must have +#' structure like: +#' `list(met = list(path = "path/to/sipnet.clim"), events = list(path = "path/to/events.in"), ...)` +#' +#' @inheritParams split_sipnet_met +#' @author Alexey Shiklomanov +#' +#' @return Modified `inputs` list with all `path` entries replaced with new +#' paths (suitable for inserting back into `settings$run$inputs`). +#' @export split_inputs.SIPNET <- function(start.time, stop.time, inputs, overwrite = FALSE, outpath = NULL) { - #### Get met paths - met <- inputs + result <- inputs + if ("met" %in% names(result)) { + result[["met"]][["path"]] <- split_sipnet_met( + start.time, + stop.time, + result$met$path, + overwrite = overwrite, + outpath = outpath + ) + } + + if ("events" %in% names(result)) { + result[["events"]][["path"]] <- split_sipnet_events( + start.time, + stop.time, + result$events$path, + overwrite = overwrite, + outpath = outpath + ) + } + result +} + +#' Split sipnet `events.in` files according to start and stop date +#' +#' @param eventfile Path to `events.in` file. +#' @inheritParams split_inputs.SIPNET +split_sipnet_events <- function(start.time, stop.time, eventfile, overwrite = FALSE, outpath = NULL) { + # Read events.in + prefix <- sub(".in", "", basename(eventfile), fixed = TRUE) + outpath <- outpath %||% dirname(eventfile) + dir.create(outpath, recursive = TRUE, showWarnings = FALSE) + + #Changing the name of the files, so it would contain the name of the hour as well. + formatted_start <- strftime(start.time) + formatted_stop <- strftime(stop.time) + outfile <- file.path(outpath, paste0(prefix, ".", formatted_start, "-", formatted_stop, ".in")) + names(outfile) <- paste(start.time, "-", stop.time) + + if (file.exists(outfile) && !overwrite) { + PEcAn.logger::logger.warn( + outfile, + " already exists and overwrite is FALSE, so keeping existing file." + ) + return(outfile) + } + + # We can't use `read.table` or similar here because events file rows have + # different numbers of columns depending on event type. Instead, we just + # filter based on the event date, which is always the first two columns (year + # and doy). + events_in_raw <- readLines(eventfile) + events_in_list <- strsplit(events_in_raw, "[[:space:]]+") + years_in <- vapply(events_in_list, \(x) as.integer(x[[1]]), integer(1)) + doys_in <- vapply(events_in_list, \(x) as.integer(x[[2]]), integer(1)) + # Not using sipnet2datetime here because it returns times with time zones, + # which could cause subtle timezone-related bugs + dates_in <- as.Date(sprintf("%d-01-01", years_in)) + (doys_in - 1) + idx_keep <- (dates_in >= start.time) & (dates_in < stop.time) + if (length(idx_keep) == 0) { + PEcAn.logger::logger.warn("No events to keep, so `events.in` will be empty") + } + events_out_str <- events_in_raw[idx_keep] + writeLines(events_out_str, outfile) + invisible(outfile) +} + +##' Extract subset of a sipnet clim file based on start and end time +##' +##' @author Mike Dietze, Ann Raiho, Alexey Shiklomanov +##' +##' @param start.time start date and time at which to start extraction +##' @param stop.time stop date and time at which to stop extraction +##' @param met path to sipnet clim file to be split +##' @param overwrite if `TRUE`, overwrite existing target file (Default `FALSE`) +##' @param outpath if specified, write output to a new directory. Default `NULL` writes back to the directory being read +##' +##' @return path to split up climate file +split_sipnet_met <- function(start.time, stop.time, met, overwrite = FALSE, outpath = NULL) { + start.time <- coerce_to_datetime(start.time) + stop.time <- coerce_to_datetime(stop.time) path <- dirname(met) prefix <- sub(".clim", "", basename(met), fixed = TRUE) if(is.null(outpath)){ @@ -33,7 +119,11 @@ split_inputs.SIPNET <- function(start.time, stop.time, inputs, overwrite = FALSE formatted_stop <- gsub(' ',"_", as.character(stop.time)) file <- paste0(outpath, "/", prefix, ".", formatted_start, "-", formatted_stop, ".clim") - if(file.exists(file) & !overwrite){ + if(file.exists(file) && !overwrite){ + PEcAn.logger::logger.warn( + file, + " already exists and overwrite is FALSE, so keeping existing file." + ) return(file) } @@ -60,3 +150,21 @@ split_inputs.SIPNET <- function(start.time, stop.time, inputs, overwrite = FALSE #settings$run$inputs$met$path <- file return(file) } # split_inputs.SIPNET + +coerce_to_datetime <- function(x) { + if (inherits(x, "POSIXt")) { + return(x) + } + xname <- deparse(substitute(x)) + if (!inherits(x, "Date")) { + PEcAn.logger::logger.severe( + "Invalid", xname, ":", x, + "(class:", class(x), ")" + ) + } + PEcAn.logger::logger.debug( + xname, "is a date, but this function expects a datetime.", + "Coercing to datetime by setting to midnight UTC." + ) + as.POSIXct(x, tz = "UTC") +} diff --git a/models/sipnet/R/write.configs.SIPNET.R b/models/sipnet/R/write.configs.SIPNET.R index 8c7cf038246..9212f2ce8cf 100755 --- a/models/sipnet/R/write.configs.SIPNET.R +++ b/models/sipnet/R/write.configs.SIPNET.R @@ -64,8 +64,10 @@ #' #' @param defaults nested list of named constant parameter values. The #' structure is `list(list(constants = list(trait1 = , trait2 = , ...)))`. -#' Only `defaults[[1]]$constants` is used; all other elements are silently ignored. -#' @param trait.values vector of samples for a given trait +#' Only `defaults[[1]]$constants` is used; all other elements are silently ignored. +#' @param trait.values named list of trait values for each PFT. SIPNET can +#' handle only one vegetation PFT, and an optional "soil" PFT for soil constants. +#' `trait.values` that do not fit this format will throw a warning. #' @param settings PEcAn settings object #' @param run.id run ID #' @param inputs list of model inputs @@ -78,6 +80,22 @@ write.config.SIPNET <- function(defaults, trait.values, settings, run.id, inputs = NULL, IC = NULL, restart = NULL, spinup = NULL) { + if (!is.list(trait.values)) { + PEcAn.logger::logger.severe( + "`trait.values` must be a list with names corresponding to PFTs." + ) + } + + if ( + (length(trait.values) > 2) || + (length(trait.values) == 2 && !("soil" %in% names(trait.values))) + ) { + PEcAn.logger::logger.warn(paste0( + "SIPNET can only handle one vegetation PFT and one optional soil PFT. ", + "Passing multiple vegetation PFTs may lead to unexpected behavior." + )) + } + rev_raw <- settings$model$revision legacy_v1 <- c("102319", "136", "r136", "ssr", "git") if (is.null(rev_raw) || rev_raw %in% legacy_v1) { @@ -124,7 +142,7 @@ write.config.SIPNET <- function(defaults, trait.values, settings, run.id, inputs } } PEcAn.logger::logger.info(paste0("Writing SIPNET configs with input ", template.clim)) - + # find out where to write run/ouput rundir <- file.path(settings$host$rundir, as.character(run.id)) outdir <- file.path(settings$host$outdir, as.character(run.id)) @@ -132,7 +150,7 @@ write.config.SIPNET <- function(defaults, trait.values, settings, run.id, inputs rundir <- file.path(settings$rundir, as.character(run.id)) outdir <- file.path(settings$modeloutdir, as.character(run.id)) } - + # create launch script (which will create symlink) if (!is.null(settings$model$jobtemplate) && file.exists(settings$model$jobtemplate)) { jobsh <- readLines(con = settings$model$jobtemplate, n = -1) diff --git a/models/sipnet/R/write.events.SIPNET.R b/models/sipnet/R/write.events.SIPNET.R index 1afc0f43620..06477ebadcc 100644 --- a/models/sipnet/R/write.events.SIPNET.R +++ b/models/sipnet/R/write.events.SIPNET.R @@ -77,12 +77,16 @@ write.events.SIPNET <- function(events_json, outdir) { dates <- as.Date(vapply(evs, function(e) as.character(e$date), character(1))) years <- as.integer(format(dates, "%Y")) days <- as.integer(format(dates, "%j")) + # Sort everything first; then just loop normally ord <- order(dates) - lines <- character(length(ord)) - for (i in ord) { - e <- evs[[i]] - year <- years[[i]] - day <- days[[i]] + evs_sorted <- evs[ord] + days_sorted <- days[ord] + years_sorted <- years[ord] + lines <- character(length(evs)) + for (i in seq_along(evs)) { + e <- evs_sorted[[i]] + year <- years_sorted[[i]] + day <- days_sorted[[i]] type <- e$event_type if (type == "tillage") { f <- if (is.null(e$tillage_eff_0to1)) 0 else e$tillage_eff_0to1 diff --git a/models/sipnet/man/combine_sipnet_out.Rd b/models/sipnet/man/combine_sipnet_out.Rd new file mode 100644 index 00000000000..85358b569ff --- /dev/null +++ b/models/sipnet/man/combine_sipnet_out.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/combine_sipnet_out.R +\name{combine_sipnet_out} +\alias{combine_sipnet_out} +\title{Combine a bunch of \code{sipnet.out} files into a single file} +\usage{ +combine_sipnet_out(directory, outfile, files = NULL) +} +\arguments{ +\item{directory}{Parent directory to search for \code{sipnet.out} files. You must +provide either this or an explicit vector of \code{files}.} + +\item{outfile}{File to which combined \code{sipnet.out} will be written} + +\item{files}{Optional vector of paths to files to combine. All must be +readable with \code{\link[=read_sipnet_out]{read_sipnet_out()}}. If \code{NULL} (default), looks for files +named \code{sipnet.out} in \code{directory} and its subdirectories, recursively.} +} +\value{ +\code{outfile} (path to output file), invisibly +} +\description{ +Combine a bunch of \code{sipnet.out} files into a single file +} diff --git a/models/sipnet/man/read_sipnet_out.Rd b/models/sipnet/man/read_sipnet_out.Rd new file mode 100644 index 00000000000..be533af4c89 --- /dev/null +++ b/models/sipnet/man/read_sipnet_out.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_sipnet_out.R +\name{read_sipnet_out} +\alias{read_sipnet_out} +\title{Read \code{sipnet.out} file to \code{data.frame}} +\usage{ +read_sipnet_out(sipnet_out_file) +} +\arguments{ +\item{sipnet_out_file}{Path to \code{sipnet.out} file} +} +\value{ +\code{data.frame} of SIPNET output +} +\description{ +Read \code{sipnet.out} file to \code{data.frame} +} diff --git a/models/sipnet/man/split_inputs.SIPNET.Rd b/models/sipnet/man/split_inputs.SIPNET.Rd index 96fc565c94d..cef939824b3 100644 --- a/models/sipnet/man/split_inputs.SIPNET.Rd +++ b/models/sipnet/man/split_inputs.SIPNET.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/split_inputs.SIPNET.R \name{split_inputs.SIPNET} \alias{split_inputs.SIPNET} -\title{split_inputs.SIPNET} +\title{Split SIPNET inputs into multiple files based on start and end time} \usage{ split_inputs.SIPNET( start.time, @@ -13,22 +13,30 @@ split_inputs.SIPNET( ) } \arguments{ -\item{start.time}{start date and time for each SDA ensemble} +\item{start.time}{Start date or datetime for splitting} -\item{stop.time}{stop date and time for each SDA ensemble} +\item{stop.time}{End date or datetime for splitting} -\item{inputs}{list of model inputs to use in write.configs.SIPNET} +\item{inputs}{Named \code{inputs} list as provided by PEcAn \code{settings}. Must have +structure like: +\code{list(met = list(path = "path/to/sipnet.clim"), events = list(path = "path/to/events.in"), ...)}} -\item{overwrite}{Default FALSE} +\item{overwrite}{if \code{TRUE}, overwrite existing target file (Default \code{FALSE})} -\item{outpath}{if specified, write output to a new directory. Default NULL writes back to the directory being read} +\item{outpath}{if specified, write output to a new directory. Default \code{NULL} writes back to the directory being read} } \value{ -file split up climate file +Modified \code{inputs} list with all \code{path} entries replaced with new +paths (suitable for inserting back into \code{settings$run$inputs}). } \description{ -Splits climate met for SIPNET +Subset each SIPNET input file and write a new file containing values \verb{>= start.time} and \verb{< end.time} (note: end time is \emph{not} inclusive) +} +\details{ +NOTE that sipnet met files contain dates \emph{and} times, while sipnet event +files contain only dates. Comparing a datetime to a date will coerce the +date to midnight UTC. } \author{ -Mike Dietze and Ann Raiho +Alexey Shiklomanov } diff --git a/models/sipnet/man/split_sipnet_events.Rd b/models/sipnet/man/split_sipnet_events.Rd new file mode 100644 index 00000000000..2ebdba2091c --- /dev/null +++ b/models/sipnet/man/split_sipnet_events.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/split_inputs.SIPNET.R +\name{split_sipnet_events} +\alias{split_sipnet_events} +\title{Split sipnet \code{events.in} files according to start and stop date} +\usage{ +split_sipnet_events( + start.time, + stop.time, + eventfile, + overwrite = FALSE, + outpath = NULL +) +} +\arguments{ +\item{start.time}{Start date or datetime for splitting} + +\item{stop.time}{End date or datetime for splitting} + +\item{eventfile}{Path to \code{events.in} file.} + +\item{overwrite}{if \code{TRUE}, overwrite existing target file (Default \code{FALSE})} + +\item{outpath}{if specified, write output to a new directory. Default \code{NULL} writes back to the directory being read} +} +\description{ +Split sipnet \code{events.in} files according to start and stop date +} diff --git a/models/sipnet/man/split_sipnet_met.Rd b/models/sipnet/man/split_sipnet_met.Rd new file mode 100644 index 00000000000..daf39a5ed30 --- /dev/null +++ b/models/sipnet/man/split_sipnet_met.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/split_inputs.SIPNET.R +\name{split_sipnet_met} +\alias{split_sipnet_met} +\title{Extract subset of a sipnet clim file based on start and end time} +\usage{ +split_sipnet_met(start.time, stop.time, met, overwrite = FALSE, outpath = NULL) +} +\arguments{ +\item{start.time}{start date and time at which to start extraction} + +\item{stop.time}{stop date and time at which to stop extraction} + +\item{met}{path to sipnet clim file to be split} + +\item{overwrite}{if \code{TRUE}, overwrite existing target file (Default \code{FALSE})} + +\item{outpath}{if specified, write output to a new directory. Default \code{NULL} writes back to the directory being read} +} +\value{ +path to split up climate file +} +\description{ +Extract subset of a sipnet clim file based on start and end time +} +\author{ +Mike Dietze, Ann Raiho, Alexey Shiklomanov +} diff --git a/models/sipnet/man/write.config.SIPNET.Rd b/models/sipnet/man/write.config.SIPNET.Rd index b241757e2b6..37ef098e459 100644 --- a/models/sipnet/man/write.config.SIPNET.Rd +++ b/models/sipnet/man/write.config.SIPNET.Rd @@ -20,7 +20,9 @@ write.config.SIPNET( structure is \verb{list(list(constants = list(trait1 = , trait2 = , ...)))}. Only \code{defaults[[1]]$constants} is used; all other elements are silently ignored.} -\item{trait.values}{vector of samples for a given trait} +\item{trait.values}{named list of trait values for each PFT. SIPNET can +handle only one vegetation PFT, and an optional "soil" PFT for soil constants. +\code{trait.values} that do not fit this format will throw a warning.} \item{settings}{PEcAn settings object} diff --git a/models/sipnet/tests/testthat/data/events-39011.in b/models/sipnet/tests/testthat/data/events-39011.in new file mode 100644 index 00000000000..badfb845b58 --- /dev/null +++ b/models/sipnet/tests/testthat/data/events-39011.in @@ -0,0 +1,123 @@ +2016 161 irrig 9.65252 0 +2016 205 irrig 9.64896 0 +2016 256 irrig 9.58199 0 +2017 163 irrig 9.53059 0 +2017 205 irrig 9.55854 0 +2017 256 irrig 9.63501 0 +2018 159 irrig 9.6026 0 +2018 164 plant 17.7 5.31 3.54 8.85 +2018 164 irrig 55.26454 0 +2018 243 irrig 46.66387 0 +2018 304 harv 0.85 0 0.15 1 +2019 159 plant 32.4 9.72 6.48 16.2 +2019 165 irrig 6.32198 0 +2019 172 irrig 6.12285 0 +2019 179 irrig 6.02169 0 +2019 186 irrig 6.15909 0 +2019 193 irrig 6.18148 0 +2019 200 irrig 6.43141 0 +2019 207 irrig 6.28203 0 +2019 214 irrig 6.25072 0 +2019 221 irrig 5.97653 0 +2019 228 irrig 5.91664 0 +2019 236 irrig 6.41118 0 +2019 244 irrig 6.37572 0 +2019 253 irrig 6.11432 0 +2019 264 irrig 6.04351 0 +2019 277 irrig 6.03546 0 +2019 288 harv 0.85 0 0.15 1 +2019 292 irrig 5.91671 0 +2019 306 irrig 5.95993 0 +2019 324 irrig 5.96982 0 +2020 7 irrig 5.74717 0 +2020 32 irrig 5.85457 0 +2020 47 irrig 5.69725 0 +2020 59 irrig 5.9169 0 +2020 80 irrig 6.06453 0 +2020 93 irrig 6.16762 0 +2020 108 irrig 5.9422 0 +2020 116 irrig 5.71009 0 +2020 123 irrig 5.85222 0 +2020 130 irrig 5.90922 0 +2020 138 irrig 6.23485 0 +2020 146 irrig 6.19786 0 +2020 151 plant 28.4 8.52 5.68 14.2 +2020 153 irrig 5.93207 0 +2020 160 irrig 6.19921 0 +2020 167 irrig 5.84346 0 +2020 174 irrig 6.35657 0 +2020 181 irrig 6.36321 0 +2020 188 irrig 6.20552 0 +2020 195 irrig 6.38482 0 +2020 202 irrig 5.93723 0 +2020 209 irrig 5.86775 0 +2020 216 irrig 6.36583 0 +2020 223 irrig 5.72889 0 +2020 230 irrig 5.83106 0 +2020 238 irrig 6.01655 0 +2020 246 irrig 5.75161 0 +2020 256 irrig 6.17723 0 +2020 268 irrig 5.92472 0 +2020 282 irrig 5.93119 0 +2020 297 irrig 5.81343 0 +2020 299 harv 0.85 0 0.15 1 +2020 316 irrig 5.97592 0 +2020 336 irrig 5.85369 0 +2020 364 irrig 5.95409 0 +2021 17 irrig 5.73476 0 +2021 56 irrig 6.14175 0 +2021 69 irrig 5.97068 0 +2021 86 irrig 6.08402 0 +2021 95 irrig 5.72843 0 +2021 104 irrig 6.01936 0 +2021 112 irrig 6.07443 0 +2021 120 irrig 5.83634 0 +2021 127 irrig 6.4189 0 +2021 133 irrig 5.72954 0 +2021 140 irrig 5.94605 0 +2021 147 irrig 6.13868 0 +2021 153 plant 29.8 8.94 5.96 14.9 +2021 154 irrig 6.48331 0 +2021 161 irrig 5.98042 0 +2021 168 irrig 6.44376 0 +2021 174 irrig 5.69915 0 +2021 181 irrig 6.42743 0 +2021 188 irrig 6.45644 0 +2021 194 irrig 5.82907 0 +2021 201 irrig 6.3062 0 +2021 208 irrig 6.45553 0 +2021 215 irrig 6.4415 0 +2021 222 irrig 6.04692 0 +2021 229 irrig 5.97619 0 +2021 237 irrig 5.85328 0 +2021 245 irrig 6.13553 0 +2021 253 irrig 5.71006 0 +2021 263 irrig 5.74335 0 +2021 275 irrig 5.8088 0 +2021 286 harv 0.85 0 0.15 1 +2021 289 irrig 5.82425 0 +2021 320 irrig 5.82727 0 +2021 360 irrig 5.6906 0 +2022 19 irrig 5.96617 0 +2022 27 plant 4.3 1.29 0.86 2.15 +2022 177 harv 0.85 0 0.15 1 +2022 194 plant 13.6 4.08 2.72 6.8 +2022 240 irrig 39.32469 0 +2022 267 harv 0.85 0 0.15 1 +2023 162 plant 31.8 9.54 6.36 15.9 +2023 166 irrig 6.38402 0 +2023 173 irrig 5.78955 0 +2023 181 irrig 6.49648 0 +2023 188 irrig 6.47614 0 +2023 195 irrig 6.10943 0 +2023 202 irrig 6.24171 0 +2023 209 irrig 6.21757 0 +2023 216 irrig 5.91485 0 +2023 224 irrig 6.5025 0 +2023 232 irrig 6.26688 0 +2023 240 irrig 6.02302 0 +2023 249 irrig 5.83862 0 +2023 259 irrig 6.00448 0 +2023 271 irrig 5.7135 0 +2023 285 irrig 5.93296 0 +2023 286 harv 0.85 0 0.15 1 diff --git a/models/sipnet/tests/testthat/test-split_inputs.SIPNET.R b/models/sipnet/tests/testthat/test-split_inputs.SIPNET.R index 2abcee8c253..db09a03bfa8 100644 --- a/models/sipnet/tests/testthat/test-split_inputs.SIPNET.R +++ b/models/sipnet/tests/testthat/test-split_inputs.SIPNET.R @@ -9,15 +9,16 @@ test_that("split_inputs", { by = "2 years" ) - clim_split <- mapply( + clim_split_list <- mapply( split_inputs.SIPNET, start.time = dates, stop.time = c(dates[-1], as.Date("2006-01-01")), # Stop just _before_ these dates MoreArgs = list( - inputs = climfile, + inputs = list(met = list(path = climfile)), outpath = outdir ) ) + clim_split <- vapply(clim_split_list, `[[`, character(1), "path") # all steps processed expect_length(clim_split, 4) @@ -30,21 +31,65 @@ test_that("split_inputs", { read.table(climfile), clim_split |> lapply(read.table) |> - do.call(what = "rbind") + c(make.row.names = FALSE) |> + do.call(what = rbind) ) }) -test_that("v2 clim format", { +test_that("split_inputs insists on start/end time being dates", { outdir <- withr::local_tempdir() climfile <- file.path("data", "niwot_1999_v2.clim") - clim_split <- split_inputs.SIPNET( + expect_error(split_inputs.SIPNET( start.time = "1999-01-01", stop.time = "1999-04-01", - inputs = climfile, + inputs = list(met = list(path = climfile)), + outpath = outdir + ), "Invalid start.time") +}) + +test_that("v2 clim format", { + outdir <- withr::local_tempdir() + climfile <- file.path("data", "niwot_1999_v2.clim") + + clim_split_list <- split_inputs.SIPNET( + start.time = as.Date("1999-01-01"), + stop.time = as.Date("1999-04-01"), + inputs = list(met = list(path = climfile)), outpath = outdir ) + clim_split <- vapply(clim_split_list, \(x) x$path, character(1)) expect_length(clim_split, 1) expect_length(readLines(clim_split), 90 * 2) # Jan-March 2x/day expect_equal(ncol(read.table(clim_split, nrows = 1)), 12) }) + +test_that("splitting event files", { + outdir <- withr::local_tempdir() + eventfile <- file.path("data", "events-39011.in") + dates <- seq( + from = as.Date("2016-01-01"), + to = as.Date("2024-01-01"), + by = "6 months" + ) + + inputs <- mapply( + split_inputs.SIPNET, + start.time = head(dates, -1), + stop.time = tail(dates, -1), + MoreArgs = list( + inputs = list(events = list(path = eventfile)), + outpath = outdir + ) + ) + + new_events <- vapply(inputs, `[[`, character(1), "path") + expect_true(length(new_events) == (length(dates) - 1)) + expect_true(all(file.exists(new_events))) + + original <- readLines(eventfile) + combined <- lapply(new_events, readLines) |> + do.call(what = c) |> + unname() + expect_equal(original, combined) +}) diff --git a/models/sipnet/tests/testthat/test-write.events.SIPNET.R b/models/sipnet/tests/testthat/test-write.events.SIPNET.R index f792a1fcc09..92a98433611 100644 --- a/models/sipnet/tests/testthat/test-write.events.SIPNET.R +++ b/models/sipnet/tests/testthat/test-write.events.SIPNET.R @@ -39,3 +39,35 @@ testthat::test_that("write.events.SIPNET handles multi-site events.json (one fil testthat::expect_true(startsWith(norm(got2[1]), "2022 60 plant")) testthat::expect_true(startsWith(norm(tail(got2, 1)), "2022 69 irrig")) }) + +testthat::test_that("events are sorted by date", { + ev_json1 <- system.file(file.path("events_fixtures", "events_site1.json"), + package = "PEcAn.data.land" + ) + outdir <- withr::local_tempdir() + events_orig <- jsonlite::read_json(ev_json1) + # Shuffle the events + n_events <- length(events_orig[[1]]$events) + withr::with_seed(8675309, { + idx <- sample(n_events) + }) + events_shuffled <- events_orig + events_shuffled[[1]]$events <- events_shuffled[[1]]$events[idx] + shuffled_file <- file.path(outdir, "events.json") + jsonlite::write_json(events_shuffled, shuffled_file, auto_unbox = TRUE) + files <- write.events.SIPNET(shuffled_file, outdir) + expect_length(files, 1) + got <- readLines(files[1]) + expected <- c( + "2022 35 till 0.2", + "2022 40 till 0.1", + "2022 40 irrig 5 1", + "2022 40 fert 0 0 10", + "2022 50 plant 10 3 2 5", + "2022 250 harv 0.1 0 0 0" + ) + # Three events in the middle have the same dates, so they won't sort + # reliably. Just check that the first and last two events are correct. + expect_equal(norm(head(got, 1)), norm(head(expected, 1))) + expect_equal(norm(tail(got, 2)), norm(tail(expected, 2))) +}) diff --git a/modules/data.land/.Rbuildignore b/modules/data.land/.Rbuildignore index edd7e377255..d8e715679ba 100644 --- a/modules/data.land/.Rbuildignore +++ b/modules/data.land/.Rbuildignore @@ -2,3 +2,4 @@ contrib data-raw ^docs$ .*venv/ +.*/sipnet-restart-workflow/ diff --git a/modules/data.land/NAMESPACE b/modules/data.land/NAMESPACE index c3214ba9e67..ba47162bfcd 100644 --- a/modules/data.land/NAMESPACE +++ b/modules/data.land/NAMESPACE @@ -18,6 +18,7 @@ export(download_package_rm) export(ens_veg_module) export(eto_to_etc) export(eto_to_etc_bism) +export(events_to_crop_cycle_starts) export(extract.stringCode) export(extract_FIA) export(extract_NEON_veg) diff --git a/modules/data.land/NEWS.md b/modules/data.land/NEWS.md index a12f22d6f87..3769fc71e8b 100644 --- a/modules/data.land/NEWS.md +++ b/modules/data.land/NEWS.md @@ -24,6 +24,7 @@ `neonstore`, `PEcAn.benchmark`, `PEcAn.visualization`, `rjags`, `sirt`, and `sp` are now suggested rather than required. They are only needed for specific optional functionality. (#3599) +* Updated `validate_events_json()` to use events schema v0.1.1 by default. The previous default v0.1.0 is still available by setting `schema_version="0.1.0"`. # PEcAn.data.land 1.9.0 diff --git a/modules/data.land/R/events_to_crop_cycle_starts.R b/modules/data.land/R/events_to_crop_cycle_starts.R new file mode 100644 index 00000000000..371a7f80599 --- /dev/null +++ b/modules/data.land/R/events_to_crop_cycle_starts.R @@ -0,0 +1,55 @@ +#' Extract the first planting date of each crop cycle +#' +#' Reads a (JSON) management events file and finds the planting events at which +#' the site changes from from one crop to another, ignoring repeat plantings of +#' the same crop. +#' These are the dates when single-PFT models will need to restart to update +#' their crop parameterization. +#' +#' TODO: For now this function requires each planting event to specify a +#' `crop` attribute, but note that this is not enforced by v0.1 of the PEcAn +#' events schema. The schema instead allows each site object to specify a +#' site-level `PFT` attribute that is implied constant over time. +#' As I write this I think the schema may need to change to require a crop or +#' PFT identifier be specified for every planting event. +#' +#' @param event_json path to an `events.json` file +#' +#' @return data frame with columns `site_id`, `date`, `crop`, +#' with one row per detected crop cycle. +#' @export +#' @author Chris Black +#' +#' @examples +#' # Not currently runnable because file does not list crop in planting events. +#' # Revisit after deciding if schema update is warranted. +#' \dontrun{ +#' evts <- system.file( +#' "events_fixtures/events_site1_site2.json", +#' package = "PEcAn.data.land" +#' ) +#' events_to_crop_cycle_starts(evts) +#' } +events_to_crop_cycle_starts <- function(event_json) { + jsonlite::read_json(event_json) |> + dplyr::bind_rows() |> + dplyr::mutate(events = purrr::map(.data$events, as.data.frame)) |> + tidyr::unnest("events") |> + dplyr::filter(.data$event_type %in% c("planting", "harvest")) |> + dplyr::mutate(date = as.Date(.data$date)) |> + dplyr::arrange(.data$date) |> + find_crop_changes() +} + +# helper for events_to_crop_cyle_starts, +# mostly to ease unit testing +find_crop_changes <- function(event_df) { + event_df |> + dplyr::filter(.data$event_type == "planting") |> + dplyr::arrange(.data$site_id, .data$date) |> + dplyr::mutate(crop_cycle_id = dplyr::consecutive_id(.data$site_id, .data$crop_code)) |> + dplyr::group_by(.data$site_id, .data$crop_cycle_id) |> + dplyr::slice_min(.data$date) |> + dplyr::ungroup() |> + dplyr::select("site_id", "date", "crop_code") +} diff --git a/modules/data.land/R/validate_events.R b/modules/data.land/R/validate_events.R index 721dd3ad35a..b96aa6ab31e 100644 --- a/modules/data.land/R/validate_events.R +++ b/modules/data.land/R/validate_events.R @@ -1,7 +1,7 @@ #' Validate PEcAn events JSON against schema v0.1.0 #' #' Validates a PEcAn events JSON file (single-site object or an array of site -#' objects) against the bundled JSON Schema (draft 2020-12) using the AJV +#' objects) against the bundled JSON Schema using the AJV #' engine. #' #' - Logs an error and returns FALSE if the JSON file does not exist or does @@ -10,6 +10,7 @@ #' not installed, so calling code can proceed without a hard dependency. #' #' @param events_json character. Path to the JSON file to validate. +#' @param schema_version character. Version of the PEcAn events schema to validate against. #' @param verbose logical. When `TRUE`, include detailed AJV messages on error. #' @param max_errs integer. Print only this many validation errors. #' To see the rest, use the `errors` attribute of the return value. @@ -25,7 +26,7 @@ #' # package = "PEcAn.data.land")) #' #' @export -validate_events_json <- function(events_json, verbose = TRUE, max_errs = 50) { +validate_events_json <- function(events_json, schema_version = "0.1.1", verbose = TRUE, max_errs = 50) { if (!file.exists(events_json)) { PEcAn.logger::logger.error(glue::glue("events_json file does not exist: {events_json}")) return(FALSE) @@ -36,7 +37,11 @@ validate_events_json <- function(events_json, verbose = TRUE, max_errs = 50) { return(NA) } - schema <- system.file("events_schema_v0.1.0.json", package = "PEcAn.data.land", mustWork = TRUE) + schema <- system.file( + paste0("events_schema_v", schema_version, ".json"), + package = "PEcAn.data.land", + mustWork = TRUE + ) ok <- jsonvalidate::json_validate(events_json, schema = schema, engine = "ajv", verbose = verbose, error = FALSE) if (isTRUE(ok)) { PEcAn.logger::logger.info(glue::glue("events_json file is valid: {events_json}")) diff --git a/modules/data.land/inst/events_fixtures/events_site1.json b/modules/data.land/inst/events_fixtures/events_site1.json index 19605953a19..d2b583e4475 100644 --- a/modules/data.land/inst/events_fixtures/events_site1.json +++ b/modules/data.land/inst/events_fixtures/events_site1.json @@ -1,6 +1,6 @@ [ { - "pecan_events_version": "0.1.0", + "pecan_events_version": "0.1.1", "site_id": "EX1", "events": [ { @@ -29,11 +29,13 @@ { "event_type": "planting", "date": "2022-02-19", - "leaf_c_kg_m2": 0.01 + "leaf_c_kg_m2": 0.01, + "crop_code": "Mo17" }, { "event_type": "harvest", "date": "2022-09-07", + "crop_code": "Mo17", "frac_above_removed_0to1": 0.1, "frac_below_removed_0to1": 0.0, "frac_above_to_litter_0to1": 0.0, diff --git a/modules/data.land/inst/events_fixtures/events_site1_site2.json b/modules/data.land/inst/events_fixtures/events_site1_site2.json index 84a2ee195d6..fb2cc151249 100644 --- a/modules/data.land/inst/events_fixtures/events_site1_site2.json +++ b/modules/data.land/inst/events_fixtures/events_site1_site2.json @@ -1,8 +1,7 @@ [ { - "pecan_events_version": "0.1.0", + "pecan_events_version": "0.1.1", "site_id": "S1", - "pft": "PFT", "events": [ { "event_type": "tillage", @@ -12,6 +11,7 @@ { "event_type": "harvest", "date": "2022-09-01", + "crop_code": "wheat", "frac_above_removed_0to1": 0.2, "frac_below_removed_0to1": 0.0, "frac_above_to_litter_0to1": 0.0, @@ -20,14 +20,14 @@ ] }, { - "pecan_events_version": "0.1.0", + "pecan_events_version": "0.1.1", "site_id": "S2", - "pft": "PFT", "events": [ { "event_type": "planting", "date": "2022-03-01", - "leaf_c_kg_m2": 0.01 + "leaf_c_kg_m2": 0.01, + "crop_code": "maize" }, { "event_type": "irrigation", diff --git a/modules/data.land/inst/events_schema_v0.1.1.json b/modules/data.land/inst/events_schema_v0.1.1.json new file mode 100644 index 00000000000..14e5b8c7002 --- /dev/null +++ b/modules/data.land/inst/events_schema_v0.1.1.json @@ -0,0 +1,81 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://pecanproject.org/schema/events-mvp-0-1-1.json", + "oneOf": [ + { "$ref": "#/$defs/site" }, + { "type": "array", "items": { "$ref": "#/$defs/site" } } + ], + "$defs": { + "site": { + "type": "object", + "required": ["pecan_events_version", "site_id", "events"], + "properties": { + "pecan_events_version": { "type": "string", "const": "0.1.1" }, + "site_id": { "type": "string", "minLength": 1 }, + "ensemble_id": { "type": ["string", "null"], "minLength": 1 }, + "geometry_uri": { "type": ["string", "null"], "format": "uri" }, + "provenance": { "type": "object", "additionalProperties": true }, + "events": { + "type": "array", + "items": { + "type": "object", + "required": ["event_type", "date"], + "properties": { + "event_type": { + "type": "string", + "enum": ["planting", "harvest", "irrigation", "fertilization", "tillage"] + }, + "date": { "type": "string", "pattern": "^\\d{4}-\\d{2}-\\d{2}$" }, + "fraction_area": { "type": "number", "minimum": 0, "maximum": 1, "default": 1.0 }, + "source": { "type": "string" }, + + "leaf_c_kg_m2": { "type": "number", "minimum": 0 }, + "wood_c_kg_m2": { "type": "number", "minimum": 0 }, + "fine_root_c_kg_m2": { "type": "number", "minimum": 0 }, + "coarse_root_c_kg_m2": { "type": "number", "minimum": 0 }, + "cultivar": { "type": "string" }, + "crop_code": { "type": "string" }, + "crop_display": { "type": "string" }, + + "frac_above_removed_0to1": { "type": "number", "minimum": 0, "maximum": 1 }, + "frac_below_removed_0to1": { "type": "number", "minimum": 0, "maximum": 1 }, + "frac_above_to_litter_0to1": { "type": "number", "minimum": 0, "maximum": 1 }, + "frac_below_to_litter_0to1": { "type": "number", "minimum": 0, "maximum": 1 }, + + "amount_mm": { "type": "number", "minimum": 0 }, + "method": { "type": "string", "enum": ["soil", "canopy", "flood"] }, + "immed_evap_frac_0to1": { "type": "number", "minimum": 0, "maximum": 1 }, + + "org_c_kg_m2": { "type": "number", "minimum": 0 }, + "org_n_kg_m2": { "type": "number", "minimum": 0 }, + "nh4_n_kg_m2": { "type": "number", "minimum": 0 }, + "no3_n_kg_m2": { "type": "number", "minimum": 0 }, + + "tillage_eff_0to1": { "type": "number", "minimum": 0 }, + "intensity_category": { "type": "string" }, + "depth_m": { "type": "number", "minimum": 0 } + }, + "allOf": [ + { "if": { "properties": { "event_type": { "const": "planting" } } }, + "then": { "required": ["crop_code", "leaf_c_kg_m2"] } }, + { "if": { "properties": { "event_type": { "const": "harvest" } } }, + "then": { "required": ["frac_above_removed_0to1"] } }, + { "if": { "properties": { "event_type": { "const": "irrigation" } } }, + "then": { "required": ["amount_mm", "method"] } }, + { "if": { "properties": { "event_type": { "const": "fertilization" } } }, + "then": { "anyOf": [ + { "required": ["org_c_kg_m2"] }, + { "required": ["nh4_n_kg_m2"] }, + { "required": ["no3_n_kg_m2"] } + ] } }, + { "if": { "properties": { "event_type": { "const": "tillage" } } }, + "then": { "required": ["tillage_eff_0to1"] } } + ], + "additionalProperties": true + } + } + }, + "additionalProperties": false + } + } +} diff --git a/modules/data.land/man/events_to_crop_cycle_starts.Rd b/modules/data.land/man/events_to_crop_cycle_starts.Rd new file mode 100644 index 00000000000..c3677bf0cf6 --- /dev/null +++ b/modules/data.land/man/events_to_crop_cycle_starts.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/events_to_crop_cycle_starts.R +\name{events_to_crop_cycle_starts} +\alias{events_to_crop_cycle_starts} +\title{Extract the first planting date of each crop cycle} +\usage{ +events_to_crop_cycle_starts(event_json) +} +\arguments{ +\item{event_json}{path to an `events.json` file} +} +\value{ +data frame with columns `site_id`, `date`, `crop`, + with one row per detected crop cycle. +} +\description{ +Reads a (JSON) management events file and finds the planting events at which +the site changes from from one crop to another, ignoring repeat plantings of +the same crop. +These are the dates when single-PFT models will need to restart to update +their crop parameterization. +} +\details{ +TODO: For now this function requires each planting event to specify a +`crop` attribute, but note that this is not enforced by v0.1 of the PEcAn +events schema. The schema instead allows each site object to specify a +site-level `PFT` attribute that is implied constant over time. +As I write this I think the schema may need to change to require a crop or +PFT identifier be specified for every planting event. +} +\examples{ +# Not currently runnable because file does not list crop in planting events. +# Revisit after deciding if schema update is warranted. +\dontrun{ +evts <- system.file( + "events_fixtures/events_site1_site2.json", + package = "PEcAn.data.land" +) +events_to_crop_cycle_starts(evts) +} +} +\author{ +Chris Black +} diff --git a/modules/data.land/man/validate_events_json.Rd b/modules/data.land/man/validate_events_json.Rd index c0a285d6b3d..18c16706f1a 100644 --- a/modules/data.land/man/validate_events_json.Rd +++ b/modules/data.land/man/validate_events_json.Rd @@ -4,11 +4,18 @@ \alias{validate_events_json} \title{Validate PEcAn events JSON against schema v0.1.0} \usage{ -validate_events_json(events_json, verbose = TRUE, max_errs = 50) +validate_events_json( + events_json, + schema_version = "0.1.1", + verbose = TRUE, + max_errs = 50 +) } \arguments{ \item{events_json}{character. Path to the JSON file to validate.} +\item{schema_version}{character. Version of the PEcAn events schema to validate against.} + \item{verbose}{logical. When `TRUE`, include detailed AJV messages on error.} \item{max_errs}{integer. Print only this many validation errors. @@ -21,7 +28,7 @@ NA if validator unavailable. } \description{ Validates a PEcAn events JSON file (single-site object or an array of site -objects) against the bundled JSON Schema (draft 2020-12) using the AJV +objects) against the bundled JSON Schema using the AJV engine. } \details{ diff --git a/modules/data.land/tests/testthat/test-events_to_crop_cycle_starts.R b/modules/data.land/tests/testthat/test-events_to_crop_cycle_starts.R new file mode 100644 index 00000000000..4b4fe97f6a5 --- /dev/null +++ b/modules/data.land/tests/testthat/test-events_to_crop_cycle_starts.R @@ -0,0 +1,49 @@ +test_that("non-planting events are ignored", { + dat <- dplyr::tribble( + ~site_id, ~date, ~event_type, ~crop_code, + "a", "2016-01-01", "planting", "almond", + "a", "2016-05-01", "irrigation", NA_character_, + "a", "2017-01-01", "planting", "almond", + "a", "2017-05-15", "fertilization", NA_character_, + ) + res <- find_crop_changes(dat) + expect_equal(nrow(res), 1) + expect_equal(res$date, "2016-01-01") + expect_equal(res, find_crop_changes(dat[-c(2, 4), ])) +}) + +test_that("nonconsecutive runs of the same crop counted separately", { + dat <- dplyr::tribble( + ~site_id, ~date, ~event_type, ~crop_code, + "b", "2016-03-01", "planting", "tomato", + "b", "2017-03-05", "planting", "tomato", + "b", "2018-04-15", "planting", "potato", + "b", "2018-08-01", "planting", "tomato", + ) + res <- find_crop_changes(dat) + expect_equal(nrow(res), 3) + expect_equal(res$date, dat$date[c(1, 3, 4)]) +}) + +test_that("sites are counted separately", { + dat <- dplyr::tribble( + ~site_id, ~date, ~event_type, ~crop_code, + "a", "2016-03-01", "planting", "grape", + "b", "2016-03-01", "planting", "grape", + "c", "2023-03-01", "planting", "grape", + ) + res <- find_crop_changes(dat) + expect_equal(nrow(res), 3) + expect_equal(res$date, dat$date) + expect_equal(res$site_id, dat$site_id) +}) + +test_that("reads from JSON", { + path <- system.file( + "events_fixtures/events_site1.json", + package = "PEcAn.data.land" + ) + res <- events_to_crop_cycle_starts(path) + expect_equal(res$date, as.Date("2022-02-19")) + expect_equal(res$crop_code, "Mo17") +}) diff --git a/modules/uncertainty/R/generate_joint_ensemble_design.R b/modules/uncertainty/R/generate_joint_ensemble_design.R index c94b0525127..470a9688033 100644 --- a/modules/uncertainty/R/generate_joint_ensemble_design.R +++ b/modules/uncertainty/R/generate_joint_ensemble_design.R @@ -119,4 +119,4 @@ generate_joint_ensemble_design <- function(settings, # list includes additional info beyond just X that's required by the function that # does the sobol index calculations, but not required to do the runs themselves. return(list(X = design_matrix)) -} \ No newline at end of file +} diff --git a/workflows/sipnet-restart-workflow/01-prepare-events.R b/workflows/sipnet-restart-workflow/01-prepare-events.R new file mode 100644 index 00000000000..7992c51224d --- /dev/null +++ b/workflows/sipnet-restart-workflow/01-prepare-events.R @@ -0,0 +1,172 @@ +#!/usr/bin/env Rscript + +config <- config::get(file = "workflows/sipnet-restart-workflow/config.yml") + +# Pick a parcel from irrigation +pid <- config[["parcel_id"]] +site_id <- config[["site_id"]] + +# Find the closest design point to that parcel to use existing met +if (is.null(site_id)) { + parcel_path <- config[["parcel_path"]] + parcel <- sf::read_sf( + parcel_path, + query = glue::glue("SELECT * FROM parcels WHERE parcel_id = {pid}") + ) + + dp_path <- config[["dp_path"]] + design_points <- read.csv(dp_path) |> + sf::st_as_sf(coords = c("lon", "lat"), crs = 4326) |> + sf::st_transform(sf::st_crs(parcel)) + dp_idx <- sf::st_nearest_feature(parcel, design_points) + site_id <- design_points[dp_idx, ][["id"]] +} + +# Make the events.json +planting <- list.files( + config[["planting_events_dir"]], + "planting_statewide_.*\\.parquet", + full.names = TRUE +) |> + arrow::open_dataset() |> + dplyr::collect() |> + dplyr::filter(.data$site_id == as.character(.env$pid)) |> + dplyr::mutate(date = as.Date(.data$date)) |> + # Start no earlier than 2016 because our met doesn't go back before 2015 + dplyr::filter(date >= as.Date("2016-01-01")) |> + tibble::as_tibble() + +# Start date is the first planting (after 2016) +start_date <- min(planting$date) + +planting_events <- planting |> + dplyr::select( + "site_id", "event_type", "date", + "crop_code" = "code", + "leaf_c_kg_m2" = "C_LEAF", + "wood_c_kg_m2" = "C_STEM", + "fine_root_c_kg_m2" = "C_FINEROOT", + "coarse_root_c_kg_m2" = "C_COARSEROOT", + "leaf_n_kg_m2" = "N_LEAF", + "wood_n_kg_m2" = "N_STEM", + "fine_root_n_kg_m2" = "N_FINEROOT", + "coarse_root_n_kg_m2" = "N_COARSEROOT" + ) + +# Phenology is not currently used... +# mslsp_path <- config[["mslsp_path"]] +# phenology <- fs::dir_ls(mslsp_path, glob = "*.parquet") |> +# arrow::open_dataset() |> +# dplyr::filter(.data$parcel_id == .env$pid, !is.na(.data$mslsp_cycle)) |> +# dplyr::collect() |> +# tibble::as_tibble() |> +# dplyr::arrange(.data$year, .data$mslsp_cycle) |> +# dplyr::relocate( +# "year", "mslsp_cycle", dplyr::starts_with("landiq_"), +# ) + +harvest_dir <- config[["harvest_events_dir"]] +pidc <- as.character(pid) +harvest_events <- list.files(harvest_dir, "*.parquet", full.names = TRUE) |> + arrow::open_dataset() |> + dplyr::filter( + .data$site_id == .env$pidc, + .data$date >= .env$start_date + ) |> + dplyr::collect() |> + tibble::as_tibble() |> + dplyr::select( + "site_id", "event_type", "date", dplyr::starts_with("frac_") + ) + +# End with the final harvest +end_date <- max(harvest_events$date) + +irrigation_path <- config[["irrigation_path"]] + +irrigation_events_raw <- arrow::open_dataset(irrigation_path) |> + dplyr::filter(.data$parcel_id == .env$pid) |> + dplyr::collect() |> + tibble::as_tibble() + +# Irrigation events include uncertainty ensembles, so we process them +# accordingly. +irrigation_events_all <- irrigation_events_raw |> + dplyr::filter( + .data$date >= .env$start_date, + .data$date <= .env$end_date + ) |> + dplyr::mutate( + event_type = "irrigation", + site_id = as.character(.data$parcel_id) + ) |> + dplyr::select(-c("parcel_id")) |> + dplyr::relocate("site_id", "event_type", "date") + +irrigation_events_list <- split( + dplyr::select(irrigation_events_all, -"ens_id"), + irrigation_events_all[["ens_id"]] +) + +make_event_list <- function(df) { + df2list <- function(df) { + as.list(df) |> purrr::list_transpose() + } + df |> + tidyr::nest(.by = "site_id", .key = "events") |> + dplyr::mutate(events = purrr::map(.data$events, df2list)) +} + +planting_n <- make_event_list(planting_events) +harvest_n <- make_event_list(harvest_events) +irrigation_n_list <- purrr::map(irrigation_events_list, make_event_list) + +make_all_events <- function(...) { + dplyr::bind_rows(...) |> + dplyr::summarize(events = list(purrr::list_c(.data$events)), .by = "site_id") |> + dplyr::mutate(pecan_events_version = "0.1.1", .before = "site_id") +} + +# NOTE: This only works if we each event type has either 1 ensemble or the same +# number of ensembles. For varying names of ensembles, we need a more +# sophisticated strategy. +pmap_args <- c( + if (length(planting_n) > 0) list(list(planting_n)), + if (length(harvest_n) > 0) list(list(harvest_n)), + if (length(irrigation_n_list) > 0) irrigation_n_list +) +all_events_list <- purrr::pmap(pmap_args, make_all_events) + +outdir_root <- config[["outdir_root"]] +events_dir <- file.path(outdir_root, "events") +unlink(events_dir, recursive = TRUE) +dir.create(events_dir, showWarnings = FALSE, recursive = TRUE) + +names(all_events_list) <- file.path( + events_dir, paste0("event_", seq_along(all_events_list), ".json") +) +if (length(irrigation_events_list) > 0) { + names(all_events_list) <- file.path( + events_dir, + paste0(gsub("^irr_", "event_", names(irrigation_events_list)), ".json") + ) +} + +purrr::iwalk( + all_events_list, + jsonlite::write_json, + pretty = TRUE, + auto_unbox = TRUE +) + +# NOTE: Right now, `write.events.SIPNET` has no way to customize the filename, +# only the output directory. So we have to create a bunch of individual +# directories here, with each one containing one SIPNET event file (but +# possibly for multiple sites). +sipnet_event_dirs <- gsub("\\.json$", ".sipnet", names(all_events_list)) + +purrr::walk2( + names(all_events_list), + sipnet_event_dirs, + PEcAn.SIPNET::write.events.SIPNET +) diff --git a/workflows/sipnet-restart-workflow/02-prepare-settings.R b/workflows/sipnet-restart-workflow/02-prepare-settings.R new file mode 100644 index 00000000000..9e46fb23b62 --- /dev/null +++ b/workflows/sipnet-restart-workflow/02-prepare-settings.R @@ -0,0 +1,145 @@ +#!/usr/bin/env Rscript + +config <- config::get(file = "workflows/sipnet-restart-workflow/config.yml") +outdir_root <- config[["outdir_root"]] + +outdir <- normalizePath(file.path(outdir_root, "output")) +modeloutdir <- file.path(outdir, "out") +dir.create(modeloutdir, showWarnings = FALSE, recursive = TRUE) +settings_rundir <- file.path(outdir, "run") +dir.create(settings_rundir, showWarnings = FALSE, recursive = TRUE) + +binary <- config[["sipnet_binary"]] +stopifnot(file.exists(binary)) + +n_ensemble <- config[["n_ensemble"]] + +site_id <- config[["site_id"]] + +dp_data <- read.csv(config[["dp_path"]]) |> + dplyr::filter(.data$id == .env$site_id) +site_lat <- dp_data[["lat"]] +site_lon <- dp_data[["lon"]] + +events_dir <- file.path(outdir_root, "events") +events_files <- list.files(events_dir, recursive = TRUE, full.names = TRUE) +events_json_files <- as.list(grep(".*\\.json$", events_files, value = TRUE)) +names(events_json_files) <- paste0("path", seq_along(events_json_files)) +sipnet_eventfiles <- as.list(grep(".*\\.sipnet/events-.*\\.in", events_files, value = TRUE)) +names(sipnet_eventfiles) <- paste0("path", seq_along(sipnet_eventfiles)) + +# NOTE: We start from the first planting (after 2016) and end at the last harvest +events <- jsonlite::read_json(events_json_files[[1]], simplifyVector = FALSE) + +planting_dates <- events |> + purrr::chuck(1, "events") |> + purrr::keep(\(x) x$event_type == "planting") |> + purrr::map_chr("date") |> + as.Date() + +harvest_dates <- events |> + purrr::pluck(1, "events") |> + purrr::keep(\(x) x$event_type == "harvest") |> + purrr::map_chr("date") |> + as.Date() + +# Start Jan 1 of the year of the first planting +start_date <- lubridate::make_date(lubridate::year(min(planting_dates)), 1, 1) + +# End Dec 31 of the year of the last harvest +end_date <- lubridate::make_date(lubridate::year(max(harvest_dates)), 12, 31) + +met_dir <- sprintf( + "%sN_%sW", + format(round(abs(site_lat) * 2) / 2, nsmall = 0, drop0trailing = TRUE), + format(round(abs(site_lon) * 2) / 2, nsmall = 0, drop0trailing = TRUE) +) +metfiles <- file.path(config[["met_dir"]], met_dir) |> + list.files("ERA5\\..*\\.clim", full.names = TRUE) |> + as.list() +stopifnot(length(metfiles) > 0) +names(metfiles) <- paste0("path", seq_along(metfiles)) + +icfiles <- file.path(config[["ic_dir"]], site_id) |> + list.files("IC_site_.*\\.nc", full.names = TRUE) |> + as.list() +names(icfiles) <- paste0("path", seq_along(icfiles)) + +pft_dir <- config[["pft_dir"]] +stopifnot(dir.exists(pft_dir)) + +pfts <- c("temperate.deciduous", "grass", "annual_crop") |> + purrr::map(~list( + name = .x, + posterior.files = file.path(pft_dir, .x, "post.distns.Rdata"), + outdir = paste0(file.path(pft_dir, .x), "/") + )) |> + c(list(list(name = "soil", outdir = file.path(pft_dir, "soil/")))) +names(pfts) <- rep("pft", length(pfts)) + +ensemble_settings <- list( + size = n_ensemble, + variable = "LAI", + variable = "SoilMoist", + variable = "GPP", + variable = "SoilResp", + variable = "AGB", + variable = "NEE", + samplingspace = list( + parameters = list(method = "uniform"), + events = list(method = "sampling"), + met = list(method = "sampling"), + poolinitcond = list(method = "sampling") + ), + start.year = lubridate::year(start_date), + end.year = lubridate::year(end_date) +) + +settings_raw <- PEcAn.settings::as.Settings(list( + outdir = outdir, + modeloutdir = modeloutdir, + rundir = settings_rundir, + pfts = pfts, + ensemble = ensemble_settings, + model = list( + type = "SIPNET", + binary = binary, + revision = "v2", + options = list( + GDD = 0, + NITROGEN_CYCLE = 0, + ANAEROBIC = 0, + LITTER_POOL = 1 + ) + ), + run = list( + site = list( + id = site_id, + name = site_id, + lat = site_lat, + lon = site_lon, + site.pft = list( + soil = "soil" + ) + ), + start.date = start_date, + end.date = end_date, + inputs = list( + met = list(path = metfiles), + poolinitcond = list(path = icfiles), + events = list( + path = sipnet_eventfiles, + source = events_json_files + ) + ) + ), + host = list( + name = "localhost" + ) +)) + +# Delete existing outdir so we always start fresh +unlink(settings_raw$outdir, recursive = TRUE) +dir.create(settings_raw$outdir, recursive = TRUE, showWarnings = FALSE) + +PEcAn.settings::write.settings(settings_raw, "settings.xml", outdir_root) diff --git a/workflows/sipnet-restart-workflow/03-run-sipnet.R b/workflows/sipnet-restart-workflow/03-run-sipnet.R new file mode 100644 index 00000000000..534ad4512f2 --- /dev/null +++ b/workflows/sipnet-restart-workflow/03-run-sipnet.R @@ -0,0 +1,26 @@ +#!/usr/bin/env Rscript + +if (FALSE) { + devtools::install("models/sipnet", upgrade = FALSE) + devtools::install("modules/data.land", upgrade = FALSE) +} + +config <- config::get(file = "workflows/sipnet-restart-workflow/config.yml") + +settings_raw <- PEcAn.settings::read.settings(file.path(config[["outdir_root"]], "settings.xml")) + +sens_design <- PEcAn.uncertainty::generate_joint_ensemble_design( + settings_raw, + settings_raw$ensemble$size +) +write.csv(sens_design$X, file.path(settings_raw$outdir, "input_design.csv")) + +settings <- PEcAn.workflow::runModule.run.write.configs( + settings_raw, + input_design = sens_design$X +) + +source("workflows/sipnet-restart-workflow/utils.R") +jobfiles <- write_segmented_configs.SIPNET(settings, sens_design$X) + +PEcAn.workflow::runModule_start_model_runs(settings) diff --git a/workflows/sipnet-restart-workflow/04-plot-outputs.R b/workflows/sipnet-restart-workflow/04-plot-outputs.R new file mode 100644 index 00000000000..8e0268de184 --- /dev/null +++ b/workflows/sipnet-restart-workflow/04-plot-outputs.R @@ -0,0 +1,82 @@ +#!/usr/bin/env Rscript + +library(ggplot2) + +config <- config::get(file = "workflows/sipnet-restart-workflow/config.yml") + +outdir_root <- config[["outdir_root"]] + +settings <- PEcAn.settings::read.settings(file.path(outdir_root, "settings.xml")) +events_json_file <- settings |> + purrr::chuck("run", "inputs", "events", "source", "path1") + +events <- jsonlite::read_json(events_json_file, simplifyVector = FALSE) + +events_df <- dplyr::bind_rows(events[[1]][["events"]]) |> + dplyr::mutate(date = as.Date(.data$date)) |> + dplyr::filter(.data$event_type != "irrigation") |> + dplyr::arrange(.data$date) + +modeloutdir <- file.path(config$outdir_root, "output", "out") +runids <- list.files(modeloutdir) + +vars_some <- c("NEE", "LAI", "AGB", "TotSoilCarb", "leaf_carbon_content", "litter_carbon_content") +vars_more <- c( + "GPP", "NPP", "TotalResp", "AutoResp", "HeteroResp", "SoilResp", "NEE", "AbvGrndWood", + "leaf_carbon_content", "TotLivBiom", "TotSoilCarb", "Qle", "Transp", "SoilMoist", "SoilMoistFrac", + "litter_carbon_content", "LAI", "fine_root_carbon_content", "coarse_root_carbon_content", + "AGB", "GWBI" + # "mineral_N", "soil_organic_N", "litter_N", "N2O_flux", "N_leaching", "N_fixation", "N_uptake", "CH4_flux", "SWE" +) + +read_output <- function(modeloutdir, runid, variables = vars_some) { + PEcAn.utils::read.output( + runid, + file.path(modeloutdir, runid), + variables = variables, + dataframe = TRUE + ) |> + dplyr::mutate(run_id = .env$runid) |> + dplyr::as_tibble() +} + +results <- purrr::map(runids, read_output, modeloutdir = modeloutdir, variables = vars_more) |> + dplyr::bind_rows() +plt <- results |> + tidyr::pivot_longer( + -c("posix", "year", "run_id"), + names_to = "variable", + values_to = "value" + ) |> + ggplot() + + aes(x = posix, y = value, color = run_id) + + geom_line() + + geom_vline( + aes(xintercept = date, linetype = event_type), + data = events_df + ) + + scale_linetype_manual(values = c("planting" = "dotted", "harvest" = "dashed")) + + facet_wrap(vars(variable), scales = "free") + + theme_bw() + + theme(legend.position = "bottom") +if (interactive()) plt + +ggsave( + file.path(outdir_root, "restarts.png"), + plt, + width = 18.5, + height = 13, + units = "in" +) + +# zoomed <- plt + +# xlim( +# lubridate::ymd_h("2017-10-01T00"), +# lubridate::ymd_h("2017-10-17T01") +# ) +# zoomed +# ggsave( +# file.path(outdir_root, "zoomed.png"), +# zoomed, +# width = 14.5, height = 9, units = "in" +# ) diff --git a/workflows/sipnet-restart-workflow/README.md b/workflows/sipnet-restart-workflow/README.md new file mode 100644 index 00000000000..13df30fac36 --- /dev/null +++ b/workflows/sipnet-restart-workflow/README.md @@ -0,0 +1,19 @@ +# SIPNET restart workflow for events + +This is a demonstration of a workaround for running SIPNET with event files that include crop changes/rotations. + +## Execution + +1. Install PEcAn locally (`make install`, etc.). +2. Inspect the `config.yml` and add/modify paths or other values as needed. +3. Run the numbered R scripts in order. + +## Organization + +- `config.yml` --- Configuration file for use with the [`config`](https://rstudio.github.io/config/index.html) package. Mostly used for setting machine-specific paths to various inputs. This is pre-populated with inputs for @ashiklom 's local machine and the BU GEO cluster (under the `default` profile). + +- `01-prepare-events.R` --- Read raw events data (in parquet format) and write out ensembles of PEcAn `event.json` files and SIPNET-specific versions thereof +- `02-prepare-settings.R` --- Generate a PEcAn `settings.xml` file populated with machine-specific paths and appropriate configurations. +- `03-run-sipnet.R` --- Run the workflow sequentially. + - Almost all of the functionality is implemented in `utils.R`. +- `04-plot-outputs.R` --- Load outputs (using `PEcAn.utils::read.output`) and visualize diff --git a/workflows/sipnet-restart-workflow/config.yml b/workflows/sipnet-restart-workflow/config.yml new file mode 100644 index 00000000000..0b0032960d8 --- /dev/null +++ b/workflows/sipnet-restart-workflow/config.yml @@ -0,0 +1,25 @@ +default: + parcel_id: 39011 + site_id: "cf1c223d4e408116" + outdir_root: "workflows/sipnet-restart-workflow/_test" + irrigation_path: "/projectnb/dietzelab/ccmmf/usr/ashiklom/event-outputs/irrigation_10000.parquet" + parcel_path: "/projectnb/dietzelab/ccmmf/LandIQ-harmonized-v4.1/parcels.gpkg" + dp_path: "/projectnb/dietzelab/ccmmf/management/irrigation/design_points.csv" + sipnet_binary: "/projectnb/dietzelab/ccmmf/usr/ashiklom/pecan/sipnet/sipnet" + met_dir: "/projectnb/dietzelab/ccmmf/ensemble/ERA5_SIPNET" + ic_dir: "/projectnb/dietzelab/ccmmf/ensemble/IC_files" + planting_events_dir: "/projectnb/dietzelab/ccmmf/management/event_files" + mslsp_path: "/projectnb/dietzelab/ccmmf/management/phenology/matched_landiq_mslsp_v4.1" + pft_dir: "/projectnb/dietzelab/ccmmf/ensemble/pfts" + n_ensemble: 3 + +ashiklom: + irrigation_path: !expr path.expand("~/data/irrigation-events-out/irrigation_10000.parquet") + parcel_path: !expr path.expand("~/data/LandIQ-harmonized-v4.1/parcels.gpkg") + dp_path: !expr path.expand("~/data/design_points.csv") + sipnet_binary: !expr normalizePath("../_helpers/sipnet/sipnet.master", mustWork = TRUE) + met_dir: !expr path.expand("~/data/ERA5-site-processed") + ic_dir: !expr path.expand("~/data/IC-site") + planting_events_dir: !expr path.expand("~/data/event-files/planting") + mslsp_path: !expr path.expand("~/data/matched_landiq_mslsp_v4.1") + pft_dir: !expr path.expand("~/data/pfts") diff --git a/workflows/sipnet-restart-workflow/utils.R b/workflows/sipnet-restart-workflow/utils.R new file mode 100644 index 00000000000..ae27a3f732e --- /dev/null +++ b/workflows/sipnet-restart-workflow/utils.R @@ -0,0 +1,279 @@ +#' settings for whole run with many paths per input -> +#' settings for one ensemble member with one path per input +#' +#' @param settings single-site settings object (not Multisettings) +#' @param inputs named list of input indices (one row of the sample design) +subset_paths <- function(settings, path_nums) { + for (input in names(path_nums)) { + if (!is.list(settings$run$inputs[[input]])) { + next + } + path_idx <- path_nums[[input]] + all_paths <- settings$run$inputs[[input]]$path + if (path_idx > length(all_paths)) { + PEcAn.logger::logger.severe("No path at input ", sQuote(input), " index ", path_idx) + } + settings$run$inputs[[input]]$path <- all_paths[[path_idx]] + # If we define a list of `source`s, also try to subset that (if the lengths + # match). This is especially useful for processing events (because we store + # the original JSON path in the `source`). + if (!is.list(settings$run$inputs[[input]]$source)) { + next + } + all_source_paths <- settings$run$inputs[[input]]$source + n_source <- length(all_source_paths) + n_path <- length(all_paths) + if (n_source != n_path) { + PEcAn.logger::logger.warn(sprintf( + paste( + "For input %s, number of paths (%d) ", + "is not equal to number of sources (%d). ", + "Assuming these are different things and therefore leaving sources as is." + ), + input, n_path, n_source + )) + next + } + settings$run$inputs[[input]]$source <- all_source_paths[[path_idx]] + } + settings +} + +# TODO: We need a better, consistent implementation of this. However, this is +# OK as an example of a function that implements the capabilities needed for +# `write_segment_configs`. +crop2pft_example <- function(crop_code) { + cls <- substr(crop_code, 1, 1) + dplyr::case_when( + crop_code == "P1" ~ "annual_crop", + crop_code == "G2" ~ "annual_crop", + cls == "D" ~ "temperate.deciduous", + cls == "F" ~ "annual_crop", + cls == "G" ~ "grass", + cls == "P" ~ "grass", + cls == "R" ~ "grass", + is.na(crop_code) ~ "soil", + TRUE ~ "UNKNOWN_PFT" + ) +} + +write_segmented_configs.SIPNET <- function(settings, input_design = NULL, ...) { + manifest_file <- file.path(settings$outdir, "runs_manifest.csv") + if (!file.exists(manifest_file)) { + PEcAn.logger::logger.severe("Could not find manifest file: ", manifest_file) + } + inputs_runs <- read.csv(manifest_file) + if (!is.null(input_design)) { + inputs_runs <- cbind.data.frame(inputs_runs, input_design) + } + + new_jobfiles <- character() + + for (i in seq_len(nrow(inputs_runs))) { + new_jobfiles[[i]] <- write_segment_configs(settings, inputs_runs[i, ], ...) + } + invisible(new_jobfiles) +} + +segment_dataframe <- function(run_settings) { + events_json <- run_settings$run$inputs$events$source + stopifnot(file.exists(events_json)) + + crop_cycles <- PEcAn.data.land::events_to_crop_cycle_starts(events_json) |> + dplyr::ungroup() + + run_start <- as.Date(run_settings$run$start.date) + run_end <- as.Date(run_settings$run$end.date) + + # First, build segment data.frame just from the crop cycles. Note that we + # already include the run end date (lead(..., default = run_end)). + segments <- crop_cycles |> + dplyr::rename(start_date = "date") |> + dplyr::mutate( + end_date = dplyr::lead(.data$start_date - 1, default = run_end), + ) + + # If we start *before* the first planting, add a segment for that. + if (run_start < min(segments[["start_date"]])) { + first_segment <- tibble::tibble( + crop_cycle_id = 0, + site_id = segments[["site_id"]][[1]], + start_date = run_start, + end_date = segments[["start_date"]][[1]] - 1, + crop_code = NA_character_ + ) + segments <- dplyr::bind_rows(first_segment, segments) + } + + # If the run start is *after* the first planting, clip to the start date. + if (run_start > min(segments[["start_date"]])) { + segments <- segments |> + dplyr::filter( + .data$start_date >= .env$run_start, + # Also clip the end date to avoid edge cases where run_start == end_date + .data$end_date > .env$run_start + ) + if (nrow(segments) < 1) { + PEcAn.logger::logger.severe( + "Filtering resulted in no segments. ", + "This is an invalid state; check settings and events.json." + ) + } + segments[1, "start_date"] <- run_start + } + + segments |> + dplyr::arrange(.data$start_date) |> + dplyr::mutate(segment_id = sprintf("%03d", dplyr::row_number())) +} + +write_segment_configs <- function( + settings, + run_row, + crop2pft = crop2pft_example, + replace_and_link = TRUE, + force_rerun = FALSE +) { + run_id <- run_row[["run_id"]] + run_dir <- file.path(settings$rundir, run_id) + run_modeloutdir <- file.path(settings$modeloutdir, run_id) + run_settings <- subset_paths(settings, run_row) + segment_rootdir <- file.path(run_dir, "segments") + + ens_samples_file <- file.path( + run_settings$outdir, + sprintf("ensemble.samples.%s.Rdata", run_settings$ensemble$ensemble.id) + ) + stopifnot(file.exists(ens_samples_file)) + ensemble_samples <- PEcAn.utils::load_local(ens_samples_file)[["ens.samples"]] + i_param <- run_row[["param"]] %||% 1 + run_traits <- lapply(ensemble_samples, \(dat) dat[i_param, ]) + + segments <- segment_dataframe(run_settings) |> + dplyr::mutate( + pft = crop2pft(.data$crop_code), + segment_dir = file.path(segment_rootdir, sprintf("segment_%s", .data$segment_id)) + ) + + jobsh_files <- character() + + for (isegment in seq_len(nrow(segments))) { + segment <- segments[isegment, ] + dstart <- segment[["start_date"]] + dend <- segment[["end_date"]] + segment_dir <- segment[["segment_dir"]] + + if (force_rerun) { + unlink(segment_dir, recursive = TRUE) + } + dir.create(segment_dir, showWarnings = FALSE, recursive = TRUE) + + runid_dummy <- "1" + + segment_inputs <- PEcAn.SIPNET::split_inputs.SIPNET( + dstart, + # NOTE: In split_inputs, end.time is *not* inclusive. + # But `dend` *is* inclusive. So `+1` as a workaround here. + (dend + 1), + run_settings$run$inputs, + overwrite = TRUE, + outpath = segment_dir + ) + + # Segment-specific settings + segment_outdir <- file.path(segment_dir, "out") + dir.create(segment_outdir, showWarnings = FALSE, recursive = TRUE) + segment_rundir <- file.path(segment_dir, "run") + dir.create(segment_rundir, showWarnings = FALSE, recursive = TRUE) + file.create(file.path(segment_rundir, "README.txt")) + + segment_rundir_withid <- file.path(segment_rundir, runid_dummy) + dir.create(segment_rundir_withid, showWarnings = FALSE, recursive = TRUE) + segment_outdir_withid <- file.path(segment_outdir, runid_dummy) + dir.create(segment_outdir_withid, showWarnings = FALSE, recursive = TRUE) + + segment_settings <- run_settings + segment_settings[["outdir"]] <- segment_outdir + segment_settings[["modeloutdir"]] <- segment_outdir + segment_settings[["rundir"]] <- segment_rundir + segment_settings[[c("run", "start.date")]] <- dstart + segment_settings[[c("run", "end.date")]] <- dend + segment_settings[[c("run", "inputs")]] <- segment_inputs + if (is.null(segment_settings[[c("model", "options")]])) { + segment_settings[[c("model", "options")]] <- list() + } + + if (isegment > 1) { + # For isegment > 1, we restart from the *previous* segment's restart.out + segment_settings[[c("model", "options", "RESTART_IN")]] <- restart_out + } + # ...and now, define a new restart.out for *this* segment + restart_out <- file.path(segment_rundir, "restart.out") + segment_settings[[c("model", "options", "RESTART_OUT")]] <- restart_out + + # trait.values must be a list of pfts, even though SIPNET takes only one + # PFT as input. However, we can pass soil params through a "soil" PFT. + # Here, if the segment PFT is soil, that's what we send. If the segment PFT + # is anything else, we send that *and* the soil params. + choose_pft <- unique(c(segment[["pft"]], "soil")) + segment_traits <- run_traits[choose_pft] + + # Write dummy runs file + writeLines(runid_dummy, file.path(segment_rundir, "runs.txt")) + + PEcAn.SIPNET::write.config.SIPNET( + defaults = segment_settings[["pfts"]], + trait.values = segment_traits, + settings = segment_settings, + run.id = runid_dummy + ) + + segment_jobsh <- file.path(segment_settings$rundir, runid_dummy, "job.sh") + stopifnot(file.exists(segment_jobsh)) + jobsh_files <- c(jobsh_files, segment_jobsh) + } + + # Now, get the run's jobsh file + run_jobsh <- file.path(run_dir, "job.sh") + target_sipnet_out <- file.path(run_modeloutdir, "sipnet.out") + segmented_jobsh_file <- file.path(run_dir, "job_segmented.sh") + segmented_jobsh_lines <- c( + "#!/usr/bin/env bash", + "", + "# Redirect output", + "exec 3>&1", + paste("exec &>", shQuote(file.path(run_modeloutdir, "logfile.txt"))), + "", + "# Run model segments", + paste("bash", jobsh_files), + "", + "# Concatenate sipnet out files", + sprintf( + "Rscript -e \"PEcAn.SIPNET::combine_sipnet_out(directory = %s, outfile = %s)\"", + shQuote(segment_rootdir), + shQuote(target_sipnet_out) + ), + "", + "# Convert output to PEcAn standard", + sprintf( + "Rscript -e \"PEcAn.SIPNET::model2netcdf.SIPNET(%s)\"", + paste( + sprintf("outdir = %s", shQuote(run_modeloutdir)), + sprintf("sitelat = %s", as.character(settings$run$site$lat)), + sprintf("sitelon = %s", as.character(settings$run$site$lon)), + sprintf("start_date = %s", shQuote(settings$run$start.date)), + sprintf("end_date = %s", shQuote(settings$run$end.date)), + sprintf("revision = %s", shQuote(settings$model$revision)), + sep = ", " + ) + ) + ) + writeLines(segmented_jobsh_lines, segmented_jobsh_file) + if (replace_and_link) { + run_jobsh_backup <- file.path(run_dir, "job_original.sh") + file.rename(run_jobsh, run_jobsh_backup) + file.symlink(segmented_jobsh_file, run_jobsh) + Sys.chmod(run_jobsh, mode = "0755") + } + invisible(segmented_jobsh_file) +}