diff --git a/R/frs_habitat.R b/R/frs_habitat.R index 069cd7b..89825bc 100644 --- a/R/frs_habitat.R +++ b/R/frs_habitat.R @@ -1213,10 +1213,12 @@ frs_habitat_species <- function(conn, species_code, base_tbl, breaks, if (!is.null(wb_type)) { wb_poly <- .frs_waterbody_tables(wb_type) + sc <- ps[["rules"]][["spawn_connected"]] .frs_connected_waterbody(conn, table, habitat, species = sp, waterbody_poly = wb_poly, waterbody_ha_min = wb_ha_min, bridge_gradient = bg, - distance_max = bd, verbose = verbose) + distance_max = bd, spawn_connected = sc, + verbose = verbose) } else { # Generic cluster approach for non-lake rearing dir <- if (is.na(fp$cluster_spawn_direction)) "both" else @@ -1429,6 +1431,11 @@ frs_habitat_species <- function(conn, species_code, base_tbl, breaks, #' @param bridge_gradient Numeric. Gradient threshold for downstream trace #' barrier detection. #' @param distance_max Numeric. Maximum downstream trace distance (metres). +#' @param spawn_connected Named list or `NULL`. Permissive spawn thresholds +#' for the additive step (Phase 3). When non-NULL, accessible segments +#' in the downstream trace that meet these thresholds get `spawning = TRUE` +#' even if they failed standard classification. Keys: `gradient_max`, +#' `channel_width_min`, `edge_types`, `edge_types_explicit`. #' @param verbose Logical. Print before/after counts. #' @noRd .frs_connected_waterbody <- function(conn, table, habitat, @@ -1436,6 +1443,7 @@ frs_habitat_species <- function(conn, species_code, base_tbl, breaks, waterbody_ha_min = 200, bridge_gradient = 0.05, distance_max = 3000, + spawn_connected = NULL, verbose = TRUE) { sp_quoted <- .frs_quote_string(species) lhm <- .frs_sql_num(waterbody_ha_min) @@ -1478,7 +1486,7 @@ frs_habitat_species <- function(conn, species_code, base_tbl, breaks, SELECT seg.id_segment FROM %s seg WHERE seg.linear_feature_id IN (SELECT linear_feature_id FROM %s)", qual_tbl, table, lfid_tbl)) - .frs_db_execute(conn, sprintf("DROP TABLE IF EXISTS %s", lfid_tbl)) + # Keep lfid_tbl alive for Phase 3 additive step # Phase 2: Upstream — spawn-eligible segments upstream of rearing, # clustered, kept only if cluster touches qualifying waterbody polygon @@ -1538,6 +1546,46 @@ frs_habitat_species <- function(conn, species_code, base_tbl, breaks, AND id_segment NOT IN (SELECT id_segment FROM %s)", habitat, sp_quoted, qual_tbl)) + # Phase 3: Additive — promote accessible segments in the downstream + # trace that meet spawn_connected thresholds but failed standard + # classification. Uses permissive gradient/channel_width from the + # spawn_connected config. + if (!is.null(spawn_connected)) { + sc_where <- sprintf( + "s.gradient <= %s", + .frs_sql_num(spawn_connected[["gradient_max"]])) + + cw_min <- spawn_connected[["channel_width_min"]] + if (!is.null(cw_min) && cw_min > 0) { + sc_where <- paste(sc_where, "AND", sprintf( + "s.channel_width >= %s", .frs_sql_num(cw_min))) + } + + # Edge type filter if present + if (!is.null(spawn_connected[["edge_types"]])) { + et_codes <- frs_edge_types(spawn_connected[["edge_types"]]) + sc_where <- paste(sc_where, "AND", sprintf( + "s.edge_type IN (%s)", paste(et_codes, collapse = ", "))) + } + if (!is.null(spawn_connected[["edge_types_explicit"]])) { + codes <- as.integer(spawn_connected[["edge_types_explicit"]]) + sc_where <- paste(sc_where, "AND", sprintf( + "s.edge_type IN (%s)", paste(codes, collapse = ", "))) + } + + .frs_db_execute(conn, sprintf( + "UPDATE %s h SET spawning = TRUE + FROM %s s + WHERE h.id_segment = s.id_segment + AND h.species_code = %s + AND h.accessible IS TRUE + AND h.spawning IS FALSE + AND s.linear_feature_id IN (SELECT linear_feature_id FROM %s) + AND %s", + habitat, table, sp_quoted, lfid_tbl, sc_where)) + } + + .frs_db_execute(conn, sprintf("DROP TABLE IF EXISTS %s", lfid_tbl)) .frs_db_execute(conn, sprintf("DROP TABLE IF EXISTS %s", qual_tbl)) if (verbose) { diff --git a/R/frs_params.R b/R/frs_params.R index 786dd2a..1726145 100644 --- a/R/frs_params.R +++ b/R/frs_params.R @@ -17,7 +17,8 @@ #' `inst/extdata/parameters_habitat_rules.yaml`. Pass `NULL` to skip #' rules entirely (every species falls through to the CSV ranges #' path used pre-0.12.0). When a rules file is loaded, each species -#' listed in the file gets `$rules$spawn` and `$rules$rear` attached +#' listed in the file gets `$rules$spawn`, `$rules$rear`, and +#' optionally `$rules$spawn_connected` attached #' to its params entry. Species not listed in the file fall through #' to the CSV ranges path. See the `parameters_habitat_rules.yaml` #' header for the rule format. @@ -139,11 +140,15 @@ frs_params <- function(conn = NULL, for (sp in names(raw)) { sp_block <- raw[[sp]] for (habitat in names(sp_block)) { - if (!habitat %in% c("spawn", "rear")) { + if (!habitat %in% c("spawn", "rear", "spawn_connected")) { stop(sprintf( - "rules YAML for species %s has unknown habitat block '%s' (expected 'spawn' or 'rear')", + "rules YAML for species %s has unknown habitat block '%s' (expected 'spawn', 'rear', or 'spawn_connected')", sp, habitat), call. = FALSE) } + if (habitat == "spawn_connected") { + .frs_validate_spawn_connected(sp_block[[habitat]], sp) + next + } rule_list <- sp_block[[habitat]] if (is.null(rule_list) || length(rule_list) == 0) next for (i in seq_along(rule_list)) { @@ -289,6 +294,75 @@ frs_params <- function(conn = NULL, } +#' Validate a spawn_connected configuration block +#' +#' Unlike spawn/rear (which are lists of rules), spawn_connected is a +#' single configuration block with fixed keys. Errors on unknown keys, +#' missing required keys, or invalid values. +#' +#' @param block Named list from the YAML. +#' @param sp Character. Species code (for error messages). +#' @noRd +.frs_validate_spawn_connected <- function(block, sp) { + if (!is.list(block) || is.null(names(block))) { + stop(sprintf( + "rules YAML %s/spawn_connected must be a named mapping", + sp), call. = FALSE) + } + + valid_keys <- c("direction", "waterbody_type", "gradient_max", + "channel_width_min", "distance_max", "bridge_gradient", + "edge_types", "edge_types_explicit") + unknown <- setdiff(names(block), valid_keys) + if (length(unknown) > 0) { + stop(sprintf( + "rules YAML %s/spawn_connected has unknown keys: %s. Valid: %s", + sp, paste(unknown, collapse = ", "), + paste(valid_keys, collapse = ", ")), call. = FALSE) + } + + # Required keys + required <- c("direction", "waterbody_type", "gradient_max", + "distance_max", "bridge_gradient") + missing_keys <- setdiff(required, names(block)) + if (length(missing_keys) > 0) { + stop(sprintf( + "rules YAML %s/spawn_connected missing required keys: %s", + sp, paste(missing_keys, collapse = ", ")), call. = FALSE) + } + + if (!block[["direction"]] %in% c("upstream", "downstream", "both")) { + stop(sprintf( + "rules YAML %s/spawn_connected direction must be 'upstream', 'downstream', or 'both'", + sp), call. = FALSE) + } + + if (!block[["waterbody_type"]] %in% c("L", "R", "W")) { + stop(sprintf( + "rules YAML %s/spawn_connected waterbody_type must be L, R, or W", + sp), call. = FALSE) + } + + for (key in c("gradient_max", "distance_max", "bridge_gradient")) { + val <- block[[key]] + if (!is.numeric(val) || length(val) != 1) { + stop(sprintf( + "rules YAML %s/spawn_connected %s must be a numeric scalar", + sp, key), call. = FALSE) + } + } + + if (!is.null(block[["channel_width_min"]])) { + val <- block[["channel_width_min"]] + if (!is.numeric(val) || length(val) != 1) { + stop(sprintf( + "rules YAML %s/spawn_connected channel_width_min must be a numeric scalar", + sp), call. = FALSE) + } + } +} + + #' Build ranges list from a parameter row #' #' Extracts spawn and rear threshold ranges from a flat parameter list into diff --git a/planning/active/findings.md b/planning/active/findings.md new file mode 100644 index 0000000..4ee04a2 --- /dev/null +++ b/planning/active/findings.md @@ -0,0 +1,34 @@ +# Findings + +## Problem +SK spawning near lake outlets uses more permissive thresholds than standard spawning — wider gradient tolerance, no channel width minimum, all edge types. The current subtractive-only approach removes spawning not in the trace but can't ADD spawning for segments that fail standard classification but meet the connected rules. BULK SK: 22.04 km vs 24.38 km bcfishpass (-9.6%). + +## spawn_connected block format +Single configuration block per species (not a list of rules like spawn/rear): +```yaml +SK: + spawn_connected: + direction: downstream + waterbody_type: L + gradient_max: 0.05 + channel_width_min: 0.0 + distance_max: 3000.0 + bridge_gradient: 0.05 +``` + +Valid keys: direction, waterbody_type, gradient_max, channel_width_min, distance_max, bridge_gradient. +Own validation — does NOT mix with spawn/rear valid_predicates. +Never inherits from CSV thresholds — defines its own. + +## Flow in .frs_connected_waterbody +1. Phase 1: downstream trace from waterbody outlets → lfid_tbl (keep alive) +2. Phase 2: upstream cluster + waterbody proximity → subtractive prune +3. **Phase 3 (new)**: additive step — set spawning = TRUE for accessible segments in lfid_tbl that meet spawn_connected thresholds +4. Drop lfid_tbl + +## Design decisions +- Fresh ships its own default rules YAML. Link ships bcfishpass-specific YAML separately. Fresh parses whatever YAML it receives. +- spawn_connected is a single config block, not a rule list — no OR semantics needed. +- Accessible gate: only accessible IS TRUE segments get promoted. +- edge_types absent = all edge types qualify in trace zone. +- channel_width_min = 0 means no channel width filter. diff --git a/planning/active/progress.md b/planning/active/progress.md new file mode 100644 index 0000000..4a82b94 --- /dev/null +++ b/planning/active/progress.md @@ -0,0 +1,9 @@ +# Progress + +## Session 2026-04-14 +- Created branch `154-spawn-connected` from main (5a4247d) +- Read issue #154, clarified 6 design questions with user +- Phase 1 complete: parser recognizes spawn_connected block (c799e37) +- Phase 2+3 complete: additive step in .frs_connected_waterbody + dispatcher wired +- 705 tests pass, code-check clean +- Next: Phase 4 — PR diff --git a/planning/active/task_plan.md b/planning/active/task_plan.md new file mode 100644 index 0000000..858c6ba --- /dev/null +++ b/planning/active/task_plan.md @@ -0,0 +1,22 @@ +# Task Plan: Issue #154 — spawn_connected rules + +## Phase 1: Parser — recognize spawn_connected block +- [x] Add `spawn_connected` as valid habitat block in `.frs_load_rules()` (alongside `spawn`, `rear`) +- [x] Add `.frs_validate_spawn_connected()` — validate fixed keys +- [x] Attach `$rules$spawn_connected` to species params (flows through existing attachment code) +- [x] Tests: 5 new tests (valid parse, unknown keys, missing keys, invalid direction, params attachment) +- [x] 102 params tests pass, code-check clean +- [x] Commit + code-check + +## Phase 2+3: Additive step + wire through dispatcher +- [x] Keep `lfid_tbl` alive past Phase 1 for Phase 3 +- [x] Add `spawn_connected` param to `.frs_connected_waterbody()` +- [x] Phase 3 additive UPDATE: spawning = TRUE for accessible segments in trace meeting permissive thresholds +- [x] Edge type filter (optional), channel_width_min (skip if 0), gradient_max (always) +- [x] Wire through: `ps[["rules"]][["spawn_connected"]]` → `.frs_connected_waterbody(spawn_connected=)` +- [x] Drop lfid_tbl after Phase 3 +- [x] 705 tests pass, code-check clean +- [x] Commit + code-check + +## Phase 4: PR +- [ ] Push, create PR with SRED tag diff --git a/tests/testthat/test-frs_params.R b/tests/testthat/test-frs_params.R index 8b55b7d..e979fad 100644 --- a/tests/testthat/test-frs_params.R +++ b/tests/testthat/test-frs_params.R @@ -193,6 +193,97 @@ test_that(".frs_load_rules errors on non-character edge_types", { }) +# --- spawn_connected validation tests --- + +test_that(".frs_load_rules accepts valid spawn_connected block", { + tmp <- tempfile(fileext = ".yaml") + on.exit(unlink(tmp)) + writeLines(c( + "SK:", + " spawn:", + " - edge_types:", + " - stream", + " rear:", + " - waterbody_type: L", + " lake_ha_min: 200", + " spawn_connected:", + " direction: downstream", + " waterbody_type: L", + " gradient_max: 0.05", + " channel_width_min: 0.0", + " distance_max: 3000", + " bridge_gradient: 0.05"), tmp) + expect_silent(rules <- .frs_load_rules(tmp)) + expect_true("spawn_connected" %in% names(rules$SK)) + expect_equal(rules$SK$spawn_connected$direction, "downstream") + expect_equal(rules$SK$spawn_connected$gradient_max, 0.05) +}) + +test_that(".frs_load_rules errors on unknown spawn_connected keys", { + tmp <- tempfile(fileext = ".yaml") + on.exit(unlink(tmp)) + writeLines(c( + "SK:", + " spawn_connected:", + " direction: downstream", + " waterbody_type: L", + " gradient_max: 0.05", + " distance_max: 3000", + " bridge_gradient: 0.05", + " bogus_key: 42"), tmp) + expect_error(.frs_load_rules(tmp), "unknown keys.*bogus_key") +}) + +test_that(".frs_load_rules errors on missing required spawn_connected keys", { + tmp <- tempfile(fileext = ".yaml") + on.exit(unlink(tmp)) + writeLines(c( + "SK:", + " spawn_connected:", + " direction: downstream", + " waterbody_type: L"), tmp) + expect_error(.frs_load_rules(tmp), "missing required keys") +}) + +test_that(".frs_load_rules errors on invalid spawn_connected direction", { + tmp <- tempfile(fileext = ".yaml") + on.exit(unlink(tmp)) + writeLines(c( + "SK:", + " spawn_connected:", + " direction: sideways", + " waterbody_type: L", + " gradient_max: 0.05", + " distance_max: 3000", + " bridge_gradient: 0.05"), tmp) + expect_error(.frs_load_rules(tmp), "direction must be") +}) + +test_that("frs_params attaches spawn_connected to species params", { + csv <- system.file("extdata", "parameters_habitat_thresholds.csv", + package = "fresh") + tmp <- tempfile(fileext = ".yaml") + on.exit(unlink(tmp)) + writeLines(c( + "SK:", + " spawn:", + " - edge_types:", + " - stream", + " rear:", + " - waterbody_type: L", + " lake_ha_min: 200", + " spawn_connected:", + " direction: downstream", + " waterbody_type: L", + " gradient_max: 0.05", + " distance_max: 3000", + " bridge_gradient: 0.05"), tmp) + params <- frs_params(csv = csv, rules_yaml = tmp) + expect_true(!is.null(params[["SK"]]$rules$spawn_connected)) + expect_equal(params[["SK"]]$rules$spawn_connected$gradient_max, 0.05) +}) + + # --- Rule evaluator tests --- test_that(".frs_rule_to_sql edge_types translates via frs_edge_types", {