diff --git a/NAMESPACE b/NAMESPACE index ff85b07..d98bc0c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -136,6 +136,7 @@ importFrom(utils,data) importFrom(utils,menu) importFrom(utils,type.convert) importFrom(vcd,assoc) +importFrom(vcd,assocstats) importFrom(vcd,is.structable) importFrom(vcd,labeling_border) importFrom(vcd,mosaic) diff --git a/R/assoc_graph.R b/R/assoc_graph.R index 7dde728..818a44a 100644 --- a/R/assoc_graph.R +++ b/R/assoc_graph.R @@ -15,16 +15,24 @@ #' @param result Type of result to return: \code{"igraph"} (default) returns an #' \code{\link[igraph:igraph-package]{igraph}} object; \code{"matrix"} returns the #' adjacency matrix; \code{"edge_list"} returns a two-column character matrix of edges. +#' @param measure Type of association measure for edge weights (only for \code{glm} method): +#' \code{"none"} (default) produces an unweighted graph; +#' \code{"chisq"} computes partial chi-squared statistics (deviance change when each +#' edge is removed from the model); +#' \code{"cramer"} computes Cramer's V from the marginal two-way table for each edge. #' @param \dots Additional arguments (currently unused). #' #' @return Depending on \code{result}: #' \itemize{ #' \item \code{"igraph"}: An \code{igraph} undirected graph object of class #' \code{c("assoc_graph", "igraph")}, with vertex names corresponding to -#' the variable names. -#' \item \code{"matrix"}: A symmetric adjacency matrix (0/1) with variable names as -#' row and column names. -#' \item \code{"edge_list"}: A two-column character matrix, each row an edge. +#' the variable names. When \code{measure != "none"}, edge weights are +#' stored as \code{E(g)$weight} and the measure name as \code{g$measure}. +#' \item \code{"matrix"}: A symmetric adjacency matrix with variable names as +#' row and column names. Contains 0/1 when unweighted, or association +#' strength values when \code{measure} is specified. +#' \item \code{"edge_list"}: When unweighted, a two-column character matrix (from, to). +#' When \code{measure} is specified, a data frame with columns from, to, and weight. #' } #' #' @details @@ -82,13 +90,33 @@ #' mod.cond <- glm(Freq ~ (cigarette + alcohol + marijuana)^2 + #' (alcohol + sex + race)^2, #' data = DaytonSurvey, family = poisson) +#' +#' # define groups for the model +#' gps <- list(c("cigarette", "marijuana"), +#' "alcohol", +#' c("sex", "race")) +#' #' assoc_graph(mod.cond) #' plot(assoc_graph(mod.cond), -#' groups = list(c("cigarette", "marijuana"), -#' "alcohol", -#' c("sex", "race")), -#' main = "{Mar,Cig} indep of {Race,Sex} | Alc", -#' layout = igraph::layout_nicely) +#' groups = gps, +#' layout = igraph::layout_nicely, +#' main = "{R,S} indep {M,C} | A") +#' +#' # Weighted graph: partial chi-squared +#' g <- assoc_graph(mod.cond, measure = "chisq") +#' g +#' plot(g, edge.label = TRUE, +#' groups = gps, +#' layout = igraph::layout_nicely, +#' main = "Partial chi-squared weights") +#' +#' # Cramer's V (marginal) +#' g2 <- assoc_graph(mod.cond, measure = "cramer") +#' g2 +#' plot(g2, edge.label = TRUE, +#' groups = gps, +#' layout = igraph::layout_nicely, +#' main = "Cramer's V weights") #' assoc_graph <- function(x, ...) { UseMethod("assoc_graph") @@ -112,11 +140,60 @@ assoc_graph.loglm <- function(x, result = c("igraph", "matrix", "edge_list"), .. } #' @rdname assoc_graph +#' @importFrom vcd assocstats #' @export -assoc_graph.glm <- function(x, result = c("igraph", "matrix", "edge_list"), ...) { +assoc_graph.glm <- function(x, result = c("igraph", "matrix", "edge_list"), + measure = c("none", "chisq", "cramer"), ...) { result <- match.arg(result) + measure <- match.arg(measure) margins <- .glm_to_margins(x) - .margins_to_assoc_graph(margins, result = result) + + if (measure == "none") { + return(.margins_to_assoc_graph(margins, result = result)) + } + + # Compute weights — need the igraph object regardless of result type + + g <- .margins_to_assoc_graph(margins, result = "igraph") + + weights <- switch(measure, + chisq = .compute_chisq_weights(g, x), + cramer = .compute_cramer_weights(g, x) + ) + + g$measure <- measure + + if (igraph::ecount(g) > 0) { + igraph::E(g)$weight <- weights + } + + if (result == "igraph") { + return(g) + } + + if (result == "matrix") { + all_vars <- igraph::V(g)$name + nv <- length(all_vars) + adj <- matrix(0, nv, nv, dimnames = list(all_vars, all_vars)) + if (igraph::ecount(g) > 0) { + el <- igraph::as_edgelist(g) + for (i in seq_len(nrow(el))) { + adj[el[i, 1], el[i, 2]] <- weights[i] + adj[el[i, 2], el[i, 1]] <- weights[i] + } + } + return(adj) + } + + if (result == "edge_list") { + if (igraph::ecount(g) == 0) { + return(matrix(character(0), ncol = 2, dimnames = list(NULL, c("from", "to")))) + } + el <- igraph::as_edgelist(g) + out <- data.frame(from = el[, 1], to = el[, 2], weight = weights, + stringsAsFactors = FALSE) + return(out) + } } @@ -175,6 +252,61 @@ assoc_graph.glm <- function(x, result = c("igraph", "matrix", "edge_list"), ...) } +# --- Helper: margins -> glm formula with hierarchy (using * notation) --- + +.margins_to_glm_formula <- function(margins, response) { + rhs_terms <- vapply(margins, function(m) { + if (length(m) == 1) m else paste(m, collapse = " * ") + }, character(1)) + stats::as.formula(paste(response, "~", paste(rhs_terms, collapse = " + "))) +} + + +# --- Helper: partial chi-squared weights (deviance change per edge) --- + +.compute_chisq_weights <- function(g, model) { + el <- igraph::as_edgelist(g) + if (nrow(el) == 0) return(numeric(0)) + + dev_full <- stats::deviance(model) + resp <- as.character(stats::formula(model)[[2]]) + + weights <- numeric(nrow(el)) + for (i in seq_len(nrow(el))) { + # Remove this edge + eid <- igraph::get.edge.ids(g, c(el[i, 1], el[i, 2])) + g_reduced <- igraph::delete_edges(g, eid) + + # Convert reduced graph -> margins -> glm formula + margins_reduced <- .graph_to_margins(g_reduced) + new_formula <- .margins_to_glm_formula(margins_reduced, resp) + + # Refit and compute deviance change + mod_reduced <- stats::update(model, formula. = new_formula) + weights[i] <- stats::deviance(mod_reduced) - dev_full + } + weights +} + + +# --- Helper: Cramer's V weights (marginal 2-way tables) --- + +.compute_cramer_weights <- function(g, model) { + el <- igraph::as_edgelist(g) + if (nrow(el) == 0) return(numeric(0)) + + mf <- stats::model.frame(model) + freq <- stats::model.response(mf) + + weights <- numeric(nrow(el)) + for (i in seq_len(nrow(el))) { + tab <- stats::xtabs(freq ~ mf[[el[i, 1]]] + mf[[el[i, 2]]]) + weights[i] <- vcd::assocstats(tab)$cramer + } + weights +} + + # --- Helper: extract generating class (margins) from a glm formula --- .glm_to_margins <- function(object) { @@ -225,12 +357,22 @@ print.assoc_graph <- function(x, ...) { if (ne > 0) { el <- igraph::as_edgelist(x) - edge_strings <- paste0(el[, 1], " -- ", el[, 2]) + w <- igraph::E(x)$weight + if (!is.null(w)) { + edge_strings <- paste0(el[, 1], " -- ", el[, 2], " (", round(w, 2), ")") + } else { + edge_strings <- paste0(el[, 1], " -- ", el[, 2]) + } cat("Edges:", paste(edge_strings, collapse = ", "), "\n") } else { cat("Edges: (none -- mutual independence)\n") } + # Show measure if present + if (!is.null(x$measure) && x$measure != "none") { + cat("Measure:", x$measure, "\n") + } + # Show bracket notation margins <- .graph_to_margins(x) cat("Model:", loglin2string(margins), "\n") diff --git a/dev/LRstats-test.R b/dev/LRstats-test.R index 69f27d0..c9e3e2c 100644 --- a/dev/LRstats-test.R +++ b/dev/LRstats-test.R @@ -3,54 +3,6 @@ library(vcdExtra) -# --- Modified methods with label argument --- - -LRstats.glmlist <- function(object, ..., saturated = NULL, sortby = NULL, - label = c("name", "formula"), abbrev = FALSE) { - label <- match.arg(label) - ns <- sapply(object, function(x) length(x$residuals)) - if (any(ns != ns[1L])) - stop("models were not all fitted to the same size of dataset") - nmodels <- length(object) - if (nmodels == 1) - return(LRstats.default(object[[1L]], saturated = saturated)) - - rval <- lapply(object, LRstats.default, saturated = saturated) - rval <- do.call(rbind, rval) - - if (label == "formula") { - rownames(rval) <- get_models(object, abbrev = abbrev) - } - - if (!is.null(sortby)) { - rval <- rval[order(rval[, sortby], decreasing = TRUE), ] - } - rval -} - -LRstats.loglmlist <- function(object, ..., saturated = NULL, sortby = NULL, - label = c("name", "formula"), abbrev = FALSE) { - label <- match.arg(label) - ns <- sapply(object, function(x) length(x$residuals)) - if (any(ns != ns[1L])) - stop("models were not all fitted to the same size of dataset") - nmodels <- length(object) - if (nmodels == 1) - return(LRstats.default(object[[1L]], saturated = saturated)) - - rval <- lapply(object, LRstats.default, saturated = saturated) - rval <- do.call(rbind, rval) - - if (label == "formula") { - rownames(rval) <- get_models(object, abbrev = abbrev) - } - - if (!is.null(sortby)) { - rval <- rval[order(rval[, sortby], decreasing = TRUE), ] - } - rval -} - # --- Examples --- cat("\n=== loglmlist: Sequential joint independence (Titanic) ===\n") diff --git a/dev/assoc-weights-test.R b/dev/assoc-weights-test.R new file mode 100644 index 0000000..63b689f --- /dev/null +++ b/dev/assoc-weights-test.R @@ -0,0 +1,152 @@ +# assoc-weights-test.R -- Test/demo script for edge weights in assoc_graph() +# Phase 2: measure = "chisq" | "cramer" on assoc_graph.glm() + +library(vcdExtra) + +# ---- DaytonSurvey: 5-way data ---------------------------------------- + +data(DaytonSurvey) + +# Conditional independence model: {M,C} indep {R,S} | A +# +# define groups for the model +# [AM][AC][MC][AR][AS][RS] +mod.cond <- glm(Freq ~ (cigarette + alcohol + marijuana)^2 + + (alcohol + sex + race)^2, + data = DaytonSurvey, family = poisson) + +# define groups for the model + +gps <- list(c("cigarette", "marijuana"), + "alcohol", + c("sex", "race")) + +## 1. Unweighted (default) -- should be identical to Phase 1 +g0 <- assoc_graph(mod.cond) +g0 +plot(g0, + groups = gps, + main = "Unweighted (default)", + layout = igraph::layout_nicely) + + +## 2. Partial chi-squared weights +g_chi <- assoc_graph(mod.cond, measure = "chisq") +g_chi +# All weights should be positive (removing an edge worsens fit) +stopifnot(all(igraph::E(g_chi)$weight > 0)) + +plot(g_chi, edge.label = TRUE, + groups = gps, + main = "Partial chi-squared weights") + + +## 3. Cramer's V weights +g_V <- assoc_graph(mod.cond, measure = "cramer") +g_V +# All values in [0, 1] +stopifnot(all(igraph::E(g_V)$weight >= 0 & igraph::E(g_V)$weight <= 1)) + +plot(g_V, edge.label = TRUE, + groups = gps, + main = "Cramer's V weights") + + +## 4. Side-by-side comparison +op <- par(mfrow = c(1, 3)) +# Use a fixed layout so all three are comparable +layout_fixed <- igraph::layout_in_circle(g0) + +plot(g0, layout = layout_fixed, + groups = gps, + main = "Unweighted") + +plot(g_chi, layout = layout_fixed, edge.label = TRUE, + groups = gps, + main = "Chi-squared") + +plot(g_V, layout = layout_fixed, edge.label = TRUE, + groups = gps, + main = "Cramer's V") +par(op) + + +## 5. Result types with weights + +# Weighted adjacency matrix +mat <- assoc_graph(mod.cond, result = "matrix", measure = "chisq") +print(round(mat, 2)) + +# Weighted edge list (returns a data.frame) +edges <- assoc_graph(mod.cond, result = "edge_list", measure = "cramer") +print(edges) + + +# ---- All two-way model: every pair connected -------------------------- + +mod.all2 <- glm(Freq ~ .^2, data = DaytonSurvey, family = poisson) +g_all2 <- assoc_graph(mod.all2, measure = "chisq") +g_all2 + +op <- par(mfrow = c(1, 2)) +plot(g_all2, edge.label = TRUE, + groups = list(c("cigarette", "alcohol", "marijuana"), + c("sex", "race")), + main = "All 2-way: chi-squared") + +g_all2_V <- assoc_graph(mod.all2, measure = "cramer") +plot(g_all2_V, edge.label = TRUE, + groups = list(c("cigarette", "alcohol", "marijuana"), + c("sex", "race")), + main = "All 2-way: Cramer's V") +par(op) + + +# ---- Bartlett data (3-way table via glm) -------------------------------- + +data(Bartlett) +bart_df <- as.data.frame(Bartlett) + +mod.bart <- glm(Freq ~ (Alive + Time + Length)^2, + data = bart_df, family = poisson) + +g_bart <- assoc_graph(mod.bart, measure = "chisq") +g_bart +plot(g_bart, edge.label = TRUE, + main = "Bartlett: all 2-way (chi-squared)") + +g_bart_V <- assoc_graph(mod.bart, measure = "cramer") +g_bart_V +plot(g_bart_V, edge.label = TRUE, + main = "Bartlett: all 2-way (Cramer's V)") + + +# ---- UCBAdmissions via glm --------------------------------------------- + +ucb_df <- as.data.frame(UCBAdmissions) + +mod.ucb <- glm(Freq ~ (Admit + Gender) * Dept, + data = ucb_df, family = poisson) + +g_ucb <- assoc_graph(mod.ucb, measure = "chisq") +g_ucb +plot(g_ucb, edge.label = TRUE, + groups = list("Dept", c("Admit", "Gender")), + main = "Berkeley: [AD][GD] (chi-squared)") + +g_ucb_V <- assoc_graph(mod.ucb, measure = "cramer") +plot(g_ucb_V, edge.label = TRUE, + groups = list("Dept", c("Admit", "Gender")), + main = "Berkeley: [AD][GD] (Cramer's V)") + + +# ---- Sparse model: mutual independence + one edge ---------------------- + +mod.SR <- glm(Freq ~ . + sex*race, data = DaytonSurvey, family = poisson) +g_SR <- assoc_graph(mod.SR, measure = "chisq") +g_SR +# Only one edge, so weight is a single number +stopifnot(length(igraph::E(g_SR)$weight) == 1) + +plot(g_SR, edge.label = TRUE, + main = "Mutual indep + [SR] (chi-squared)") diff --git a/man/assoc_graph.Rd b/man/assoc_graph.Rd index c8f4007..43f05ec 100644 --- a/man/assoc_graph.Rd +++ b/man/assoc_graph.Rd @@ -14,7 +14,12 @@ assoc_graph(x, ...) \method{assoc_graph}{loglm}(x, result = c("igraph", "matrix", "edge_list"), ...) -\method{assoc_graph}{glm}(x, result = c("igraph", "matrix", "edge_list"), ...) +\method{assoc_graph}{glm}( + x, + result = c("igraph", "matrix", "edge_list"), + measure = c("none", "chisq", "cramer"), + ... +) \method{print}{assoc_graph}(x, ...) } @@ -32,16 +37,25 @@ assoc_graph(x, ...) \item{result}{Type of result to return: \code{"igraph"} (default) returns an \code{\link[igraph:igraph-package]{igraph}} object; \code{"matrix"} returns the adjacency matrix; \code{"edge_list"} returns a two-column character matrix of edges.} + +\item{measure}{Type of association measure for edge weights (only for \code{glm} method): +\code{"none"} (default) produces an unweighted graph; +\code{"chisq"} computes partial chi-squared statistics (deviance change when each +edge is removed from the model); +\code{"cramer"} computes Cramer's V from the marginal two-way table for each edge.} } \value{ Depending on \code{result}: \itemize{ \item \code{"igraph"}: An \code{igraph} undirected graph object of class \code{c("assoc_graph", "igraph")}, with vertex names corresponding to -the variable names. -\item \code{"matrix"}: A symmetric adjacency matrix (0/1) with variable names as -row and column names. -\item \code{"edge_list"}: A two-column character matrix, each row an edge. +the variable names. When \code{measure != "none"}, edge weights are +stored as \code{E(g)$weight} and the measure name as \code{g$measure}. +\item \code{"matrix"}: A symmetric adjacency matrix with variable names as +row and column names. Contains 0/1 when unweighted, or association +strength values when \code{measure} is specified. +\item \code{"edge_list"}: When unweighted, a two-column character matrix (from, to). +When \code{measure} is specified, a data frame with columns from, to, and weight. } } \description{ @@ -88,13 +102,33 @@ plot(assoc_graph(mod.SR), main = "Mutual indep. + [SR]") mod.cond <- glm(Freq ~ (cigarette + alcohol + marijuana)^2 + (alcohol + sex + race)^2, data = DaytonSurvey, family = poisson) + +# define groups for the model +gps <- list(c("cigarette", "marijuana"), + "alcohol", + c("sex", "race")) + assoc_graph(mod.cond) plot(assoc_graph(mod.cond), - groups = list(c("cigarette", "marijuana"), - "alcohol", - c("sex", "race")), - main = "{Mar,Cig} indep of {Race,Sex} | Alc", - layout = igraph::layout_nicely) + groups = gps, + layout = igraph::layout_nicely, + main = "{R,S} indep {M,C} | A") + +# Weighted graph: partial chi-squared +g <- assoc_graph(mod.cond, measure = "chisq") +g +plot(g, edge.label = TRUE, + groups = gps, + layout = igraph::layout_nicely, + main = "Partial chi-squared weights") + +# Cramer's V (marginal) +g2 <- assoc_graph(mod.cond, measure = "cramer") +g2 +plot(g2, edge.label = TRUE, + groups = gps, + layout = igraph::layout_nicely, + main = "Cramer's V weights") } \references{