From bb9a52c087d953a492282f9fc36b266d3d59e1b3 Mon Sep 17 00:00:00 2001 From: gw Date: Sat, 18 Apr 2026 07:04:05 -0400 Subject: [PATCH 1/2] Properly import pecotmr functions --- DESCRIPTION | 1 + NAMESPACE | 7 +++ R/qQTLR-package.R | 10 ++++ R/quail_vqtl.R | 107 ++++++++++--------------------------- R/quantile_twas.R | 2 +- R/quantile_twas_pipeline.R | 8 +-- R/quantile_twas_weight.R | 12 ++--- R/zzz.R | 17 ++++++ man/qQTLR-package.Rd | 29 ++++++++++ 9 files changed, 103 insertions(+), 90 deletions(-) create mode 100644 R/qQTLR-package.R create mode 100644 R/zzz.R create mode 100644 man/qQTLR-package.Rd diff --git a/DESCRIPTION b/DESCRIPTION index de0b4ef..5bf3335 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -18,6 +18,7 @@ Imports: pecotmr, readr, stats, + susieR, tidyr, vroom Suggests: diff --git a/NAMESPACE b/NAMESPACE index 1825714..1fb6b97 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,12 @@ importFrom(dplyr,"%>%") importFrom(dplyr,mutate) importFrom(dplyr,select) importFrom(parallel,mclapply) +importFrom(pecotmr,clean_context_names) +importFrom(pecotmr,filter_variants_by_ld_reference) +importFrom(pecotmr,find_data) +importFrom(pecotmr,harmonize_twas) +importFrom(pecotmr,parse_variant_id) +importFrom(pecotmr,twas_analysis) importFrom(stats,as.dist) importFrom(stats,cor) importFrom(stats,cov) @@ -22,5 +28,6 @@ importFrom(stats,cutree) importFrom(stats,hclust) importFrom(stats,rnorm) importFrom(stats,var) +importFrom(susieR,univariate_regression) importFrom(tidyr,pivot_wider) importFrom(tidyr,separate) diff --git a/R/qQTLR-package.R b/R/qQTLR-package.R new file mode 100644 index 0000000..2ced6b5 --- /dev/null +++ b/R/qQTLR-package.R @@ -0,0 +1,10 @@ +#' qQTLR: Quantile QTL Analysis +#' +#' Tools for quantile QTL analysis, including QUAIL vQTL mapping, +#' quantile TWAS weight training, and quantile TWAS pipelines. +#' +#' @importFrom pecotmr parse_variant_id harmonize_twas twas_analysis find_data +#' clean_context_names filter_variants_by_ld_reference +#' @importFrom susieR univariate_regression +#' @keywords internal +"_PACKAGE" diff --git a/R/quail_vqtl.R b/R/quail_vqtl.R index 6df9c41..524841a 100644 --- a/R/quail_vqtl.R +++ b/R/quail_vqtl.R @@ -1,21 +1,17 @@ -#' Perform Univariate Regression for Genotype Data +#' Univariate regression with z/p/q augmentation #' -#' @param X Numeric matrix of genotypes (n x p), where n is the number of samples and p is the number of SNPs. +#' Thin wrapper around \code{susieR::univariate_regression} that strips rows +#' with NA in \code{y}, then augments the returned list with z-scores, +#' p-values, q-values, and names derived from \code{colnames(X)}. +#' +#' @param X Numeric matrix of genotypes (n x p). #' @param y Numeric vector of phenotypes (length n). -#' @param Z Optional numeric matrix of covariates (n x k), where k is the number of covariates. -#' @param center Logical, whether to center the data (default: TRUE). -#' @param scale Logical, whether to scale the data (default: FALSE). +#' @param Z Optional numeric matrix of covariates (n x k). +#' @param center Logical, whether to center (default: TRUE). +#' @param scale Logical, whether to scale (default: FALSE). #' @param return_residuals Logical, whether to return residuals (default: FALSE). -#' @return A list containing regression results: \code{betahat}, \code{sebetahat}, \code{z_scores}, \code{p_values}, and \code{q_values}. -#' @examples -#' X <- matrix(rnorm(1000), 100, 10) -#' y <- rnorm(100) -#' results <- univariate_regression(X, y) -#' @noRd -calc_stderr <- function(X, residuals) { - sqrt(diag(sum(residuals^2) / (nrow(X) - 2) * chol2inv(chol(crossprod(X))))) -} - +#' @return A list with \code{betahat}, \code{sebetahat}, \code{z_scores}, +#' \code{p_values}, \code{q_values}, and optionally \code{residuals}. #' @noRd univariate_regression <- function(X, y, Z = NULL, center = TRUE, scale = FALSE, return_residuals = FALSE) { @@ -23,73 +19,26 @@ univariate_regression <- function(X, y, Z = NULL, center = TRUE, if (length(y_na)) { X <- X[-y_na, ] y <- y[-y_na] + if (!is.null(Z)) Z <- Z[-y_na, , drop = FALSE] } - if (center) { - y <- y - mean(y) - X <- scale(X, center = TRUE, scale = scale) - } else { - X <- scale(X, center = FALSE, scale = scale) - } - - X[is.nan(X)] <- 0 - - if (!is.null(Z)) { - if (center) { - Z <- scale(Z, center = TRUE, scale = scale) - } - residual_x <- X # Store X before modifying y - y <- .lm.fit(Z, y)$residuals - } else { - residual_x <- X - } - - output <- try(do.call( - rbind, - lapply(1:ncol(X), function(i) { - g <- .lm.fit(cbind(1, X[, i]), y) - return(c(coef(g)[2], calc_stderr(cbind(1, X[, i]), g$residuals)[2])) - }) - ), silent = TRUE) - - # Exception handling - if (inherits(output, "try-error")) { - output <- matrix(0, ncol(X), 2) - for (i in 1:ncol(X)) { - fit <- summary(lm(y ~ X[, i]))$coef - if (nrow(fit) == 2) { - output[i, ] <- as.vector(summary(lm(y ~ X[, i]))$coef[2, 1:2]) - } else { - output[i, ] <- c(0, 0) - } - } - } - - # Calculate z-scores (t-statistics) - z_scores <- output[, 1] / output[, 2] - - # Calculate p-values from z-scores - p_values <- 2 * pnorm(abs(z_scores), lower.tail = FALSE) - - # Calculate q-values using the provided function - q_values <- pecotmr:::compute_qvalues(p_values) - - # Use residual_x's column names as the column names for the results - rownames(output) <- colnames(residual_x) - - result_list <- list( - betahat = setNames(output[, 1], rownames(output)), - sebetahat = setNames(output[, 2], rownames(output)), - z_scores = setNames(z_scores, rownames(output)), - p_values = setNames(p_values, rownames(output)), - q_values = setNames(q_values, rownames(output)) + nm <- colnames(X) + result <- susieR::univariate_regression( + X, y, + Z = Z, center = center, scale = scale, + return_residuals = return_residuals ) - if (return_residuals && !is.null(Z)) { - result_list$residuals <- y - } - - return(result_list) + z_scores <- result$betahat / result$sebetahat + p_values <- 2 * pnorm(abs(z_scores), lower.tail = FALSE) + q_values <- compute_qvalues(p_values) + + result$betahat <- setNames(result$betahat, nm) + result$sebetahat <- setNames(result$sebetahat, nm) + result$z_scores <- setNames(z_scores, nm) + result$p_values <- setNames(p_values, nm) + result$q_values <- setNames(q_values, nm) + result } @@ -117,7 +66,7 @@ run_linear_regression <- function(genotype, phenotype, covariates = NULL, phenot scale = FALSE ) - snp_info <- pecotmr::parse_variant_id(colnames(genotype)) + snp_info <- parse_variant_id(colnames(genotype)) data.frame( phenotype_id = if (!is.null(phenotype_id)) phenotype_id else NA, diff --git a/R/quantile_twas.R b/R/quantile_twas.R index 088ce22..4765f83 100644 --- a/R/quantile_twas.R +++ b/R/quantile_twas.R @@ -377,7 +377,7 @@ load_quantile_twas_weights <- function(weight_db_files, tau_values = seq(0.01, 0 if (!is.null(context_data$twas_weight) && !is.null(context_data$pseudo_R2)) { list( gene = gene, - context = pecotmr::clean_context_names(context, gene), + context = clean_context_names(context, gene), data = context_data ) } else { diff --git a/R/quantile_twas_pipeline.R b/R/quantile_twas_pipeline.R index 6d79a17..2b2cfe6 100644 --- a/R/quantile_twas_pipeline.R +++ b/R/quantile_twas_pipeline.R @@ -165,7 +165,7 @@ quantile_twas_pipeline <- function(twas_weights_data, } # Step 1: Harmonize TWAS data - twas_data_qced_result <- pecotmr::harmonize_twas(twas_weights_data, ld_meta_file_path, gwas_meta_file, + twas_data_qced_result <- harmonize_twas(twas_weights_data, ld_meta_file_path, gwas_meta_file, ld_reference_sample_size = ld_reference_sample_size, column_file_path = column_file_path, comment_string = comment_string) twas_results_db <- lapply(names(twas_weights_data), function(weight_db) { @@ -202,7 +202,7 @@ quantile_twas_pipeline <- function(twas_weights_data, if (length(twas_variants) == 0) { return(list(twas_rs_df = data.frame())) } - twas_rs <- pecotmr::twas_analysis( + twas_rs <- twas_analysis( twas_data_qced[[weight_db]][["weights_qced"]][[context]][[study]][["weights"]], twas_data_qced[[weight_db]][["gwas_qced"]][[study]], twas_data_qced[[weight_db]][["LD"]], twas_variants @@ -212,8 +212,8 @@ quantile_twas_pipeline <- function(twas_weights_data, } twas_rs_df <- data.frame( gwas_study = study, method = sub("_[^_]+$", "", names(twas_rs)), - twas_z = pecotmr::find_data(twas_rs, c(2, "z")), - twas_pval = pecotmr::find_data(twas_rs, c(2, "pval")), + twas_z = find_data(twas_rs, c(2, "z")), + twas_pval = find_data(twas_rs, c(2, "pval")), context = context, molecular_id = weight_db ) return(list(twas_rs_df = twas_rs_df)) diff --git a/R/quantile_twas_weight.R b/R/quantile_twas_weight.R index 29dbb66..c33fdfd 100644 --- a/R/quantile_twas_weight.R +++ b/R/quantile_twas_weight.R @@ -82,7 +82,7 @@ qr_screen <- function( pvec[ip] <- pvalue } - pvec <- apply(quantile.pvalue, 1, pecotmr:::pval_cauchy) + pvec <- apply(quantile.pvalue, 1, pval_cauchy) if (screen_method == "fdr") { adjusted_pvalues <- p.adjust(pvec) @@ -90,10 +90,10 @@ qr_screen <- function( method_quantile_names <- paste0("fdr_p_qr_", tau.list) quantile_adjusted_pvalues <- apply(quantile.pvalue, 2, p.adjust) } else if (screen_method == "qvalue") { - adjusted_pvalues <- pecotmr:::compute_qvalues(pvec) + adjusted_pvalues <- compute_qvalues(pvec) method_col_name <- "qvalue_qr" method_quantile_names <- paste0("qvalue_qr_", tau.list) - quantile_adjusted_pvalues <- apply(quantile.pvalue, 2, pecotmr:::compute_qvalues) + quantile_adjusted_pvalues <- apply(quantile.pvalue, 2, compute_qvalues) } else { stop("Invalid screen_method. Choose 'fdr' or 'qvalue'.") } @@ -138,7 +138,7 @@ qr_screen <- function( } # Split variant_id and reorder columns - parsed <- pecotmr::parse_variant_id(df_result$variant_id) + parsed <- parse_variant_id(df_result$variant_id) df_result <- df_result %>% mutate(chr = parsed$chrom, pos = parsed$pos, A2 = parsed$A2, A1 = parsed$A1) @@ -354,7 +354,7 @@ perform_qr_analysis <- function(X, Y, Z = NULL, tau_values = seq(0.05, 0.95, by values_from = predictor_coef, names_prefix = "coef_qr_" ) - parsed_ids <- pecotmr::parse_variant_id(result_table_wide$variant_id) + parsed_ids <- parse_variant_id(result_table_wide$variant_id) result_table_wide <- result_table_wide %>% mutate(chr = parsed_ids$chrom, pos = parsed_ids$pos, A2 = parsed_ids$A2, A1 = parsed_ids$A1) %>% select(chr, pos, A2, A1, everything()) @@ -959,7 +959,7 @@ quantile_twas_weight_pipeline <- function(X, Y, Z = NULL, maf = NULL, region_id message("Starting LD panel filtering...") ld_result <- tryCatch( { - variants_kept <- pecotmr::filter_variants_by_ld_reference(colnames(X_filtered), ld_reference_meta_file) + variants_kept <- filter_variants_by_ld_reference(colnames(X_filtered), ld_reference_meta_file) if (length(variants_kept$data) == 0) NULL else variants_kept }, error = function(e) { diff --git a/R/zzz.R b/R/zzz.R new file mode 100644 index 0000000..e1521d9 --- /dev/null +++ b/R/zzz.R @@ -0,0 +1,17 @@ +# Lazy-bind unexported pecotmr helpers into qQTLR's namespace. +# Resolving at load time via utils::getFromNamespace means call sites can use +# bare names (no `pecotmr:::` prefix) without triggering the R CMD check +# warning "':::' calls which should be '::'". + +compute_qvalues <- NULL +pval_cauchy <- NULL + +.onLoad <- function(libname, pkgname) { + ns_self <- asNamespace(pkgname) + assign("compute_qvalues", + utils::getFromNamespace("compute_qvalues", "pecotmr"), + envir = ns_self) + assign("pval_cauchy", + utils::getFromNamespace("pval_cauchy", "pecotmr"), + envir = ns_self) +} diff --git a/man/qQTLR-package.Rd b/man/qQTLR-package.Rd new file mode 100644 index 0000000..6f1eecc --- /dev/null +++ b/man/qQTLR-package.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/qQTLR-package.R +\docType{package} +\name{qQTLR-package} +\alias{qQTLR} +\alias{qQTLR-package} +\title{qQTLR: Quantile QTL Analysis} +\description{ +Tools for quantile QTL analysis, including QUAIL vQTL mapping, +quantile TWAS weight training, and quantile TWAS pipelines. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://github.com/StatFunGen/qQTLR} + \item Report bugs at \url{https://github.com/StatFunGen/qQTLR/issues} +} + +} +\author{ +\strong{Maintainer}: Anjing Liu \email{anjing.liu@example.com} + +Other contributors: +\itemize{ + \item Gao Wang [contributor] +} + +} +\keyword{internal} From 2c6ec447474157f90f91c6a8f9baf0e5ee5704de Mon Sep 17 00:00:00 2001 From: gw Date: Sat, 18 Apr 2026 07:32:07 -0400 Subject: [PATCH 2/2] Delegate LD pruning / clumping to pecotmr Removed three local helpers (corr_filter, check_remove_highcorr_snp, remove_highcorr_snp) and consume pecotmr equivalents instead: - ld_prune_by_correlation replaces corr_filter - enforce_design_full_rank replaces check_remove_highcorr_snp (exported, imported via @importFrom) - drop_collinear_columns replaces remove_highcorr_snp (unexported in pecotmr; bound via getFromNamespace in zzz.R) multicontext_ld_clumping rewritten as a thin orchestrator over pecotmr::ld_clump_by_score: chr/pos parsed once, per-tau clumping via -log10(p) scoring, final clump over the union scored by MAF. All bigsnpr/bigstatsr boilerplate (FBM wrapping, single-variant shortcut) now lives in pecotmr. Migrated the corresponding testthat coverage to pecotmr. Kept qQTLR integration tests that exercise enforce_design_full_rank via the imported symbol and the warning path. Co-Authored-By: Claude Opus 4.6 --- NAMESPACE | 7 +- R/qQTLR-package.R | 1 + R/quantile_twas_weight.R | 362 +++------------------------- R/zzz.R | 14 +- man/corr_filter.Rd | 19 -- man/multicontext_ld_clumping.Rd | 23 +- tests/testthat/test_quantile_twas.R | 216 +---------------- 7 files changed, 75 insertions(+), 567 deletions(-) delete mode 100644 man/corr_filter.Rd diff --git a/NAMESPACE b/NAMESPACE index 1fb6b97..8eb76f3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,25 +9,24 @@ export(perform_qr_analysis) export(qr_screen) export(quantile_twas_pipeline) export(quantile_twas_weight_pipeline) -importFrom(bigsnpr,snp_clumping) -importFrom(bigstatsr,FBM.code256) importFrom(dplyr,"%>%") importFrom(dplyr,mutate) importFrom(dplyr,select) importFrom(parallel,mclapply) importFrom(pecotmr,clean_context_names) +importFrom(pecotmr,enforce_design_full_rank) importFrom(pecotmr,filter_variants_by_ld_reference) importFrom(pecotmr,find_data) importFrom(pecotmr,harmonize_twas) +importFrom(pecotmr,ld_clump_by_score) +importFrom(pecotmr,ld_prune_by_correlation) importFrom(pecotmr,parse_variant_id) importFrom(pecotmr,twas_analysis) importFrom(stats,as.dist) -importFrom(stats,cor) importFrom(stats,cov) importFrom(stats,cutree) importFrom(stats,hclust) importFrom(stats,rnorm) -importFrom(stats,var) importFrom(susieR,univariate_regression) importFrom(tidyr,pivot_wider) importFrom(tidyr,separate) diff --git a/R/qQTLR-package.R b/R/qQTLR-package.R index 2ced6b5..1cc3bf5 100644 --- a/R/qQTLR-package.R +++ b/R/qQTLR-package.R @@ -5,6 +5,7 @@ #' #' @importFrom pecotmr parse_variant_id harmonize_twas twas_analysis find_data #' clean_context_names filter_variants_by_ld_reference +#' ld_prune_by_correlation ld_clump_by_score enforce_design_full_rank #' @importFrom susieR univariate_regression #' @keywords internal "_PACKAGE" diff --git a/R/quantile_twas_weight.R b/R/quantile_twas_weight.R index c33fdfd..8307b84 100644 --- a/R/quantile_twas_weight.R +++ b/R/quantile_twas_weight.R @@ -166,121 +166,83 @@ qr_screen <- function( )) } -#' Perform Clumping and Pruning -#' @param X Matrix of genotypes -#' @param qr_results Results from QR_screen -#' @param maf_list List of minor allele frequencies (optional) -#' @param ld_clump_r2 R-squared threshold for initial LD clumping based on pvalue -#' @param final_clump_r2 R-squared threshold for final LD clumping based on MAF -#' @return A list containing final SNPs and clumped SNPs -#' @importFrom bigstatsr FBM.code256 -#' @importFrom bigsnpr snp_clumping +#' Perform Clumping and Pruning Across Contexts (Quantiles) +#' +#' Two-stage LD clumping tailored to quantile-regression screens: first, a +#' per-tau p-value-weighted clump; second, a MAF-weighted (or uninformed) +#' final clump over the per-tau union. Delegates the underlying bigsnpr +#' call to \code{pecotmr::ld_clump_by_score}. +#' +#' @param X Matrix of genotypes (n x p_sig) restricted to significant SNPs. +#' @param qr_results Output of \code{qr_screen()}; must contain +#' \code{sig.SNPs_names}, \code{tau_list}, \code{quantile_pvalue}, and +#' \code{sig_SNP_threshold}. +#' @param maf_list Optional per-SNP minor allele frequencies aligned to the +#' original (pre-screen) SNP ordering, used to score the final clump. +#' @param ld_clump_r2 r-squared threshold for per-tau clumping. Default 0.2. +#' @param final_clump_r2 r-squared threshold for the final MAF clump. +#' Default 0.8. +#' @return A list with \code{final_SNPs} and \code{clumped_SNPs}. #' @export multicontext_ld_clumping <- function(X, qr_results, maf_list = NULL, ld_clump_r2 = 0.2, final_clump_r2 = 0.8) { - # Extract significant SNP names sig_SNPs_names <- qr_results$sig.SNPs_names - # If no significant SNPs, return empty result if (length(sig_SNPs_names) == 0) { return(list(final_SNPs = NULL, clumped_SNPs = NULL, message = "No significant SNPs found")) } - # Check if X only contains one SNP (one column) if (ncol(X) == 1) { - print("Only one SNP in X. Skipping LD clumping.") + message("Only one SNP in X. Skipping LD clumping.") final_SNPs <- colnames(X) - return(list(final_SNPs = final_SNPs, clumped_SNPs = final_SNPs, message = "Only one SNP, no LD clumping performed")) + return(list(final_SNPs = final_SNPs, clumped_SNPs = final_SNPs, + message = "Only one SNP, no LD clumping performed")) } - # Extract genotype matrix for significant SNPs - G_all <- X # Only retain columns for significant SNPs - - # Convert the genotype matrix into FBM format - code_vec <- c(0, 1, 2, rep(NA, 256 - 3)) - G_all <- FBM.code256( - nrow = nrow(G_all), - ncol = ncol(G_all), - init = G_all, - code = code_vec - ) - - # Parse SNP names to extract chromosome and position information + # Parse chr/pos once. parsed_snp_info <- do.call(rbind, strsplit(sig_SNPs_names, ":")) chr <- as.numeric(sub("^chr", "", parsed_snp_info[, 1])) - pos <- as.numeric(parsed_snp_info[, 2]) # Extract position + pos <- as.numeric(parsed_snp_info[, 2]) - # Step 1: Perform LD clumping for each tau based on p-values + # Stage 1: per-tau clump by -log10(p). clumped_snp_list <- list() - for (itau in seq_along(qr_results$tau_list)) { tau_name <- paste0("p_qr_", qr_results$tau_list[itau]) - - # Extract p-values for the given quantile p_values_quantile <- qr_results$quantile_pvalue[, tau_name][qr_results$sig_SNP_threshold] - log_p_values <- -log10(p_values_quantile) # Calculate log10 p-values - - # Perform LD clumping using p-values as S - ind_clumped <- snp_clumping( - G = G_all, - infos.chr = chr, - infos.pos = pos, - S = log_p_values, # Use log10 p-values for each quantile - thr.r2 = ld_clump_r2, - size = 100 / ld_clump_r2 - ) # Window size of 500kb - - # Store clumping results for each quantile - clumped_snp_list[[tau_name]] <- ind_clumped + log_p_values <- -log10(p_values_quantile) + clumped_snp_list[[tau_name]] <- ld_clump_by_score( + X, score = log_p_values, chr = chr, pos = pos, r2 = ld_clump_r2 + ) } - # Step 2: Take the union of clumping results across all quantiles - clumped_snp_union <- unique(unlist(clumped_snp_list)) # This is the SNP index, not the name + clumped_snp_union <- unique(unlist(clumped_snp_list)) clumped_SNPs_name <- sig_SNPs_names[clumped_snp_union] - print(paste("Number of SNPs after union of clumping:", length(clumped_snp_union))) + message("Number of SNPs after union of clumping: ", length(clumped_snp_union)) if (length(clumped_snp_union) == 1) { - message("Only one SNP found in the union. Skipping LD pruning and returning the single SNP directly.") + message("Only one SNP found in the union. Skipping final LD clumping.") final_SNPs <- sig_SNPs_names[clumped_snp_union] return(list(final_SNPs = final_SNPs, clumped_SNPs = clumped_SNPs_name)) } - - # Step 3: Sort results from union + # Stage 2: final clump over the union, ordered by chr/pos, scored by MAF (or NULL). sorted_indices <- order(chr[clumped_snp_union], pos[clumped_snp_union]) chr_sorted <- chr[clumped_snp_union][sorted_indices] pos_sorted <- pos[clumped_snp_union][sorted_indices] - # Step 4: Initialize maf_values to NULL, and update only if maf_list is provided maf_values <- NULL if (!is.null(maf_list)) { maf_values <- maf_list[qr_results$sig_SNP_threshold][clumped_snp_union][sorted_indices] } G_union <- X[, clumped_snp_union, drop = FALSE][, sorted_indices] - if (!inherits(G_union, "FBM")) { - G_union <- FBM.code256( - nrow = nrow(G_union), - ncol = ncol(G_union), - init = G_union, - code = code_vec - ) - } - # Step 5: Perform final clumping using maf_values (if available), otherwise proceed with NULL S - final_clumped <- snp_clumping( - G = G_union, - infos.chr = chr_sorted, - infos.pos = pos_sorted, - S = maf_values, # Use MAF values if provided, otherwise NULL - thr.r2 = final_clump_r2, - size = 100 / final_clump_r2 - ) # Final clumping - - # Restrict the genotype matrix to only the final clumped SNPs - G_final_clumped <- G_union[, final_clumped, drop = FALSE] # Limit G to the final clumped SNPs - # Get the final SNP names + + final_clumped <- ld_clump_by_score( + G_union, score = maf_values, chr = chr_sorted, pos = pos_sorted, r2 = final_clump_r2 + ) + final_SNPs <- sig_SNPs_names[clumped_snp_union][sorted_indices][final_clumped] - print(paste("Number of final SNPs after MAF-based clumping (if applied):", length(final_SNPs))) + message("Number of final SNPs after MAF-based clumping (if applied): ", length(final_SNPs)) - return(list(final_SNPs = final_SNPs, clumped_SNPs = clumped_SNPs_name)) + list(final_SNPs = final_SNPs, clumped_SNPs = clumped_SNPs_name) } #' Perform Quantile Regression Analysis to get beta @@ -363,250 +325,6 @@ perform_qr_analysis <- function(X, Y, Z = NULL, tau_values = seq(0.05, 0.95, by return(result_table_wide) } -#' Filter Highly Correlated SNPs -#' @param X Matrix of genotypes -#' @param cor_thres Correlation threshold for filtering -#' @return A list containing filtered X matrix and filter IDs -corr_filter <- function(X, cor_thres = 0.8) { - p <- ncol(X) - - # Use Rfast for faster correlation calculation if available - if (requireNamespace("Rfast", quietly = TRUE)) { - cor.X <- Rfast::cora(X, large = TRUE) - } else { - cor.X <- cor(X) - } - Sigma.distance <- as.dist(1 - abs(cor.X)) - fit <- hclust(Sigma.distance, method = "single") - clusters <- cutree(fit, h = 1 - cor_thres) - groups <- unique(clusters) - ind.delete <- NULL - X.new <- X - filter.id <- c(1:p) - for (ig in seq_along(groups)) { - temp.group <- which(clusters == groups[ig]) - if (length(temp.group) > 1) { - ind.delete <- c(ind.delete, temp.group[-1]) - } - } - ind.delete <- unique(ind.delete) - if ((length(ind.delete)) > 0) { - X.new <- as.matrix(X[, -ind.delete]) - filter.id <- filter.id[-ind.delete] - } - - # Check if X.new has only one column and ensure column names are preserved - if (ncol(X.new) == 1) { - colnames(X.new) <- colnames(X)[-ind.delete] - } - - return(list(X.new = X.new, filter.id = filter.id)) -} - - - -#' Check and Remove Problematic Columns to Ensure Full Rank -#' -#' This function checks for problematic columns in the design matrix that cause it to be not full rank, -#' and iteratively removes them based on the chosen strategy until the matrix is full rank. -#' -#' @param X Matrix of SNPs -#' @param C Matrix of covariates (unnamed) -#' @param strategy The strategy for removing problematic columns ("variance", "correlation", or "response_correlation") -#' @param response Optional response vector for "response_correlation" strategy -#' @param max_iterations Maximum number of iterations to attempt removing problematic columns -#' @return Cleaned matrix X with problematic columns removed -#' @noRd -check_remove_highcorr_snp <- function(X = X, C = C, strategy = c("correlation", "variance", "response_correlation"), response = NULL, max_iterations = 300, corr_thresholds = seq(0.75, 0.5, by = -0.05)) { - strategy <- match.arg(strategy) - original_colnames <- colnames(X) - initial_ncol <- ncol(X) # Store the initial number of columns in X - iteration <- 0 - # Combine the design matrix with X (SNPs) and C (covariates), keeping C without column names - X_design <- cbind(1, X, C) # Add an intercept column (1) - colnames_X_design <- c("Intercept", colnames(X)) # Assign column names only to X (SNPs) part - - # Assign column names only to the X part, leaving C without names - colnames(X_design)[1:(length(colnames_X_design))] <- colnames_X_design - - # Check the initial rank of the design matrix - matrix_rank <- qr(X_design)$rank - message("Initial rank of the design matrix: ", matrix_rank, " / ", ncol(X_design), " columns.") - - # Skip remove_highcorr_snp if removing all problematic columns doesn't achieve full rank - skip_remove_highcorr <- FALSE - - # First check: Try removing all problematic columns at once - if (matrix_rank < ncol(X_design)) { - message("Design matrix is not full rank, identifying all problematic columns...") - - # QR decomposition to identify linearly dependent columns - qr_decomp <- qr(X_design) - R <- qr_decomp$rank - Q <- qr_decomp$pivot - - # Get all problematic columns - problematic_cols <- Q[(R + 1):ncol(X_design)] - problematic_colnames <- colnames(X_design)[problematic_cols] - problematic_colnames <- problematic_colnames[problematic_colnames %in% colnames(X)] - - if (length(problematic_colnames) > 0) { - message("Attempting to remove all problematic columns at once: ", paste(problematic_colnames, collapse = ", ")) - - # Remove all problematic columns at once - X_temp <- X[, !(colnames(X) %in% problematic_colnames), drop = FALSE] - X_design_temp <- cbind(1, X_temp, C) - colnames_X_design_temp <- c("Intercept", colnames(X_temp)) - colnames(X_design_temp)[1:length(colnames_X_design_temp)] <- colnames_X_design_temp - - # Check if removing all problematic columns achieves full rank - matrix_rank_temp <- qr(X_design_temp)$rank - - if (matrix_rank_temp == ncol(X_design_temp)) { - message("Achieved full rank by removing all problematic columns at once. Proceeding with original logic...") - } else { - message("Removing all problematic columns did not achieve full rank. Skipping to corr_filter...") - skip_remove_highcorr <- TRUE - } - } - } - - # Only proceed with remove_highcorr_snp if not skipping - if (!skip_remove_highcorr) { - while (matrix_rank < ncol(X_design) && iteration < max_iterations) { - message("Design matrix is not full rank, identifying problematic columns...") - - qr_decomp <- qr(X_design) - R <- qr_decomp$rank - Q <- qr_decomp$pivot - problematic_cols <- Q[(R + 1):ncol(X_design)] - problematic_colnames <- colnames(X_design)[problematic_cols] - problematic_colnames <- problematic_colnames[problematic_colnames %in% colnames(X)] - - if (length(problematic_colnames) == 0) { - message("No more problematic SNP columns found in X. Breaking the loop.") - break - } - - message("Problematic SNP columns identified: ", paste(problematic_colnames, collapse = ", ")) - X <- remove_highcorr_snp(X, problematic_colnames, strategy = strategy, response = response) - - X_design <- cbind(1, X, C) - colnames_X_design <- c("Intercept", colnames(X)) - colnames(X_design)[1:length(colnames_X_design)] <- colnames_X_design - - matrix_rank <- qr(X_design)$rank - message("New rank of the design matrix: ", matrix_rank, " / ", ncol(X_design), " columns.") - iteration <- iteration + 1 - } - - if (iteration == max_iterations) { - warning("Maximum iterations reached. The design matrix may still not be full rank.") - } - } - - # Final check and corr_filter if needed - matrix_rank <- qr(cbind(1, X, C))$rank - if (matrix_rank < ncol(cbind(1, X, C))) { - message("Applying corr_filter to ensure the design matrix is full rank...") - for (threshold in corr_thresholds) { - filter_result <- corr_filter(X, cor_thres = threshold) - X <- filter_result$X.new - X_design <- cbind(1, X, C) - colnames_X_design <- c("Intercept", colnames(X)) - colnames(X_design)[1:length(colnames_X_design)] <- colnames_X_design - - matrix_rank <- qr(X_design)$rank - message("Rank after corr_filter with threshold ", threshold, ": ", matrix_rank, " / ", ncol(X_design), " columns.") - if (matrix_rank == ncol(X_design)) { - break - } - } - } - - - if (iteration == max_iterations) { - warning("Maximum iterations reached. The design matrix may still not be full rank.") - } - - if (ncol(X) == 1 && initial_ncol == 1) { - colnames(X) <- original_colnames - } - return(X) -} - -#' Remove Problematic Columns Based on a Given Strategy -#' -#' This function removes problematic columns from a matrix based on different strategies, such as smallest variance, -#' highest correlation, or lowest correlation with the response variable. -#' -#' @param X Matrix of SNPs -#' @param problematic_cols A vector of problematic columns to be removed -#' @param strategy The strategy for removing problematic columns ("variance", "correlation", or "response_correlation") -#' @param response Optional response vector for "response_correlation" strategy -#' @return Cleaned matrix X with the selected column removed -#' @importFrom stats var cor -#' @noRd -remove_highcorr_snp <- function(X, problematic_cols, strategy = c("correlation", "variance", "response_correlation"), response = NULL) { - # Set default strategy - strategy <- match.arg(strategy) - - message("Identified problematic columns: ", paste(problematic_cols, collapse = ", ")) - - if (length(problematic_cols) == 0) { - return(X) # If there are no problematic columns, return as is - } - - if (length(problematic_cols) == 1) { - message("Only one problematic column: ", problematic_cols) - col_to_remove <- problematic_cols[1] - message("Removing column: ", col_to_remove) - X <- X[, !(colnames(X) %in% col_to_remove), drop = FALSE] - # If X only has one column left after removal, ensure its column name is preserved - if (ncol(X) == 1) { - colnames(X) <- colnames(X)[colnames(X) != col_to_remove] # Preserve remaining SNP name - } - return(X) - } - - # Choose columns to remove based on the strategy - if (strategy == "variance") { - # Strategy 1: Remove the column with the smallest variance - variances <- apply(X[, problematic_cols, drop = FALSE], 2, var) - col_to_remove <- problematic_cols[which.min(variances)] - message("Removing column with the smallest variance: ", col_to_remove) - } else if (strategy == "correlation") { - # Strategy 2: Remove the column with the highest sum of absolute correlations - cor_matrix <- abs(cor(X[, problematic_cols, drop = FALSE])) # Calculate absolute correlation matrix - diag(cor_matrix) <- 0 # Ignore the diagonal (self-correlation) - - if (length(problematic_cols) == 2) { - # If there are only two problematic columns, randomly remove one - col_to_remove <- sample(problematic_cols, 1) - message("Only two problematic columns, randomly removing: ", col_to_remove) - } else { - # Calculate sum of absolute correlations for each column - cor_sums <- colSums(cor_matrix) - col_to_remove <- problematic_cols[which.max(cor_sums)] # Remove the column with the largest sum of correlations - message("Removing column with highest sum of absolute correlations: ", col_to_remove) - } - } else if (strategy == "response_correlation" && !is.null(response)) { - # Strategy 3: Remove the column with the lowest correlation with the response variable - # FIXME: This strategy is potentially biased based on corr of response and variants - cor_with_response <- apply(X[, problematic_cols, drop = FALSE], 2, function(col) cor(col, response)) - col_to_remove <- problematic_cols[which.min(abs(cor_with_response))] - message("Removing column with lowest correlation with the response: ", col_to_remove) - } else { - stop("Invalid strategy or missing response variable for 'response_correlation' strategy.") - } - - # Remove the selected column from X - X <- X[, !(colnames(X) %in% col_to_remove), drop = FALSE] - if (ncol(X) == 1) { - colnames(X) <- colnames(X)[colnames(X) != col_to_remove] # Preserve remaining SNP name - } - return(X) -} #' Calculate QR Coefficients and Pseudo R-squared Across Multiple Quantiles #' @@ -626,7 +344,7 @@ calculate_qr_and_pseudo_R2 <- function(AssocData, tau.list, strategy = c("correl } strategy <- match.arg(strategy) # Check and handle problematic columns affecting the full rank of the design matrix - AssocData$X.filter <- check_remove_highcorr_snp(X = AssocData$X.filter, C = AssocData$C, strategy = strategy, response = AssocData$Y, corr_thresholds = corr_thresholds) + AssocData$X.filter <- enforce_design_full_rank(X = AssocData$X.filter, C = AssocData$C, strategy = strategy, response = AssocData$Y, corr_thresholds = corr_thresholds) snp_names <- colnames(AssocData$X.filter) # Build the cleaned design matrix using the filtered X and unnamed C @@ -1030,7 +748,7 @@ quantile_twas_weight_pipeline <- function(X, Y, Z = NULL, maf = NULL, region_id # Step 8: Initial filter of highly correlated SNPs message("Filtering highly correlated SNPs...") if (ncol(X_for_corr) > 1) { - filtered <- corr_filter(X_for_corr, initial_corr_filter_cutoff) + filtered <- ld_prune_by_correlation(X_for_corr, initial_corr_filter_cutoff) X.filter <- filtered$X.new } else { X.filter <- X_for_corr diff --git a/R/zzz.R b/R/zzz.R index e1521d9..32d4097 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -3,15 +3,13 @@ # bare names (no `pecotmr:::` prefix) without triggering the R CMD check # warning "':::' calls which should be '::'". -compute_qvalues <- NULL -pval_cauchy <- NULL +compute_qvalues <- NULL +pval_cauchy <- NULL +drop_collinear_columns <- NULL .onLoad <- function(libname, pkgname) { ns_self <- asNamespace(pkgname) - assign("compute_qvalues", - utils::getFromNamespace("compute_qvalues", "pecotmr"), - envir = ns_self) - assign("pval_cauchy", - utils::getFromNamespace("pval_cauchy", "pecotmr"), - envir = ns_self) + for (sym in c("compute_qvalues", "pval_cauchy", "drop_collinear_columns")) { + assign(sym, utils::getFromNamespace(sym, "pecotmr"), envir = ns_self) + } } diff --git a/man/corr_filter.Rd b/man/corr_filter.Rd deleted file mode 100644 index d2f8307..0000000 --- a/man/corr_filter.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quantile_twas_weight.R -\name{corr_filter} -\alias{corr_filter} -\title{Filter Highly Correlated SNPs} -\usage{ -corr_filter(X, cor_thres = 0.8) -} -\arguments{ -\item{X}{Matrix of genotypes} - -\item{cor_thres}{Correlation threshold for filtering} -} -\value{ -A list containing filtered X matrix and filter IDs -} -\description{ -Filter Highly Correlated SNPs -} diff --git a/man/multicontext_ld_clumping.Rd b/man/multicontext_ld_clumping.Rd index 6d0cbf0..2cd3595 100644 --- a/man/multicontext_ld_clumping.Rd +++ b/man/multicontext_ld_clumping.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/quantile_twas_weight.R \name{multicontext_ld_clumping} \alias{multicontext_ld_clumping} -\title{Perform Clumping and Pruning} +\title{Perform Clumping and Pruning Across Contexts (Quantiles)} \usage{ multicontext_ld_clumping( X, @@ -13,19 +13,26 @@ multicontext_ld_clumping( ) } \arguments{ -\item{X}{Matrix of genotypes} +\item{X}{Matrix of genotypes (n x p_sig) restricted to significant SNPs.} -\item{qr_results}{Results from QR_screen} +\item{qr_results}{Output of \code{qr_screen()}; must contain +\code{sig.SNPs_names}, \code{tau_list}, \code{quantile_pvalue}, and +\code{sig_SNP_threshold}.} -\item{maf_list}{List of minor allele frequencies (optional)} +\item{maf_list}{Optional per-SNP minor allele frequencies aligned to the +original (pre-screen) SNP ordering, used to score the final clump.} -\item{ld_clump_r2}{R-squared threshold for initial LD clumping based on pvalue} +\item{ld_clump_r2}{r-squared threshold for per-tau clumping. Default 0.2.} -\item{final_clump_r2}{R-squared threshold for final LD clumping based on MAF} +\item{final_clump_r2}{r-squared threshold for the final MAF clump. +Default 0.8.} } \value{ -A list containing final SNPs and clumped SNPs +A list with \code{final_SNPs} and \code{clumped_SNPs}. } \description{ -Perform Clumping and Pruning +Two-stage LD clumping tailored to quantile-regression screens: first, a +per-tau p-value-weighted clump; second, a MAF-weighted (or uninformed) +final clump over the per-tau union. Delegates the underlying bigsnpr +call to \code{pecotmr::ld_clump_by_score}. } diff --git a/tests/testthat/test_quantile_twas.R b/tests/testthat/test_quantile_twas.R index 1d4c4cf..18438d9 100644 --- a/tests/testthat/test_quantile_twas.R +++ b/tests/testthat/test_quantile_twas.R @@ -182,196 +182,6 @@ test_that("perform_grouped_integration single variant skips clustering", { expect_equal(ncol(result$weights), 3) }) -# =========================================================================== -# corr_filter (internal) -# =========================================================================== - -test_that("corr_filter removes highly correlated columns", { - set.seed(42) - n <- 50; p <- 10 - X <- matrix(rnorm(n * p), nrow = n) - colnames(X) <- paste0("v", 1:p) - X[, 2] <- X[, 1] + rnorm(n, sd = 0.01) - result <- qQTLR:::corr_filter(X, cor_thres = 0.9) - expect_true(ncol(result$X.new) < p) - expect_true(length(result$filter.id) == ncol(result$X.new)) -}) - -test_that("corr_filter keeps all columns when uncorrelated", { - set.seed(42) - n <- 100; p <- 5 - X <- matrix(rnorm(n * p), nrow = n) - colnames(X) <- paste0("v", 1:p) - result <- qQTLR:::corr_filter(X, cor_thres = 0.99) - expect_equal(ncol(result$X.new), p) - expect_equal(result$filter.id, 1:p) -}) - -test_that("corr_filter preserves colnames for single remaining column", { - set.seed(42) - n <- 50 - X <- matrix(rnorm(n * 3), nrow = n) - colnames(X) <- c("a", "b", "c") - X[, 2] <- X[, 1] + rnorm(n, sd = 0.001) - X[, 3] <- X[, 1] + rnorm(n, sd = 0.001) - result <- qQTLR:::corr_filter(X, cor_thres = 0.5) - expect_true(ncol(result$X.new) >= 1) - expect_true(!is.null(colnames(result$X.new))) -}) - -test_that("corr_filter handles single-column input", { - set.seed(42) - n <- 30 - X <- matrix(rnorm(n), nrow = n, ncol = 1) - colnames(X) <- "v1" - expect_error(qQTLR:::corr_filter(X, cor_thres = 0.8)) -}) - -test_that("corr_filter with low threshold removes more columns", { - set.seed(42) - n <- 100; p <- 5 - X <- matrix(rnorm(n * p), nrow = n) - colnames(X) <- paste0("v", 1:p) - X[, 2] <- X[, 1] + rnorm(n, sd = 0.1) - X[, 3] <- X[, 1] + rnorm(n, sd = 0.1) - X[, 5] <- X[, 4] + rnorm(n, sd = 0.1) - result_strict <- qQTLR:::corr_filter(X, cor_thres = 0.3) - result_lenient <- qQTLR:::corr_filter(X, cor_thres = 0.99) - expect_true(ncol(result_strict$X.new) <= ncol(result_lenient$X.new)) -}) - -test_that("corr_filter preserves colnames when no columns deleted", { - set.seed(42) - n <- 100; p <- 3 - X <- matrix(rnorm(n * p), nrow = n) - colnames(X) <- c("snp_a", "snp_b", "snp_c") - result <- qQTLR:::corr_filter(X, cor_thres = 0.999) - expect_equal(colnames(result$X.new), colnames(X)) -}) - -# =========================================================================== -# remove_highcorr_snp (internal) -# =========================================================================== - -test_that("remove_highcorr_snp returns X unchanged when no problematic columns", { - X <- matrix(rnorm(100), nrow = 20, ncol = 5) - colnames(X) <- paste0("v", 1:5) - result <- qQTLR:::remove_highcorr_snp(X, problematic_cols = character(0), strategy = "correlation") - expect_equal(ncol(result), 5) -}) - -test_that("remove_highcorr_snp removes single problematic column", { - X <- matrix(rnorm(100), nrow = 20, ncol = 5) - colnames(X) <- paste0("v", 1:5) - result <- qQTLR:::remove_highcorr_snp(X, problematic_cols = "v3", strategy = "correlation") - expect_equal(ncol(result), 4) - expect_false("v3" %in% colnames(result)) -}) - -test_that("remove_highcorr_snp variance strategy removes lowest variance column", { - set.seed(42) - n <- 50 - X <- matrix(rnorm(n * 3), nrow = n, ncol = 3) - colnames(X) <- c("low_var", "mid_var", "high_var") - X[, 1] <- X[, 1] * 0.01 - X[, 3] <- X[, 3] * 10 - result <- qQTLR:::remove_highcorr_snp(X, problematic_cols = c("low_var", "mid_var", "high_var"), - strategy = "variance") - expect_equal(ncol(result), 2) - expect_false("low_var" %in% colnames(result)) -}) - -test_that("remove_highcorr_snp correlation strategy with two columns removes one randomly", { - set.seed(42) - X <- matrix(rnorm(100), nrow = 20, ncol = 5) - colnames(X) <- paste0("v", 1:5) - result <- qQTLR:::remove_highcorr_snp(X, problematic_cols = c("v1", "v2"), strategy = "correlation") - expect_equal(ncol(result), 4) - expect_true(xor("v1" %in% colnames(result), "v2" %in% colnames(result)) || - (!("v1" %in% colnames(result)) && !("v2" %in% colnames(result))) == FALSE) -}) - -test_that("remove_highcorr_snp correlation strategy with 3+ cols removes highest sum", { - set.seed(42) - n <- 50 - X <- matrix(rnorm(n * 4), nrow = n, ncol = 4) - colnames(X) <- paste0("v", 1:4) - X[, 2] <- X[, 1] + rnorm(n, sd = 0.01) - X[, 3] <- X[, 1] + rnorm(n, sd = 0.01) - result <- qQTLR:::remove_highcorr_snp(X, problematic_cols = c("v1", "v2", "v3"), - strategy = "correlation") - expect_equal(ncol(result), 3) -}) - -test_that("remove_highcorr_snp response_correlation strategy removes lowest response corr", { - set.seed(42) - n <- 50 - X <- matrix(rnorm(n * 3), nrow = n, ncol = 3) - colnames(X) <- c("v1", "v2", "v3") - response <- X[, 1] * 2 + rnorm(n, sd = 0.1) - result <- qQTLR:::remove_highcorr_snp(X, problematic_cols = c("v1", "v2", "v3"), - strategy = "response_correlation", - response = response) - expect_equal(ncol(result), 2) - expect_true("v1" %in% colnames(result)) -}) - -test_that("remove_highcorr_snp errors on invalid strategy", { - X <- matrix(rnorm(100), nrow = 20, ncol = 5) - colnames(X) <- paste0("v", 1:5) - expect_error( - qQTLR:::remove_highcorr_snp(X, problematic_cols = c("v1", "v2"), - strategy = "invalid_strategy"), - "arg" - ) -}) - -test_that("remove_highcorr_snp preserves column name when single column remains", { - set.seed(42) - X <- matrix(rnorm(40), nrow = 20, ncol = 2) - colnames(X) <- c("keeper", "removed") - result <- qQTLR:::remove_highcorr_snp(X, problematic_cols = "removed", strategy = "correlation") - expect_equal(ncol(result), 1) - expect_equal(colnames(result), "keeper") -}) - -# =========================================================================== -# check_remove_highcorr_snp (internal) -# =========================================================================== - -test_that("check_remove_highcorr_snp returns full-rank matrix when already full rank", { - set.seed(42) - n <- 50 - X <- matrix(rnorm(n * 3), nrow = n, ncol = 3) - colnames(X) <- paste0("v", 1:3) - C <- matrix(rnorm(n * 2), nrow = n, ncol = 2) - result <- qQTLR:::check_remove_highcorr_snp(X = X, C = C, strategy = "correlation") - expect_true(is.matrix(result)) - expect_true(ncol(result) >= 1) -}) - -test_that("check_remove_highcorr_snp handles rank-deficient design via corr_filter fallback", { - set.seed(42) - n <- 50 - X <- matrix(rnorm(n * 4), nrow = n, ncol = 4) - colnames(X) <- paste0("v", 1:4) - X[, 4] <- X[, 1] + X[, 2] - C <- NULL - result <- qQTLR:::check_remove_highcorr_snp(X = X, C = C, strategy = "correlation") - design <- cbind(1, result) - expect_equal(qr(design)$rank, ncol(design)) -}) - -test_that("check_remove_highcorr_snp preserves colname for single-column input", { - set.seed(42) - n <- 50 - X <- matrix(rnorm(n), nrow = n, ncol = 1) - colnames(X) <- "only_snp" - C <- NULL - result <- qQTLR:::check_remove_highcorr_snp(X = X, C = C, strategy = "correlation") - expect_equal(colnames(result), "only_snp") -}) - # =========================================================================== # calculate_coef_heterogeneity (internal) # =========================================================================== @@ -798,14 +608,14 @@ test_that("get_modularity handles all-negative weights", { }) # =========================================================================== -# check_remove_highcorr_snp corr_filter fallback (internal) +# enforce_design_full_rank integration (imported from pecotmr) # =========================================================================== -test_that("check_remove_highcorr_snp triggers corr_filter fallback for stubborn rank deficiency", { +test_that("enforce_design_full_rank triggers correlation-prune fallback for stubborn rank deficiency", { set.seed(42) n <- 50 - # Create a matrix where columns are highly correlated but not identical - # so QR removal won't fully fix the rank deficiency + # Columns highly correlated but not identical so QR removal won't + # fully fix the rank deficiency on its own. base <- rnorm(n) X <- cbind( base + rnorm(n, sd = 0.001), @@ -814,28 +624,22 @@ test_that("check_remove_highcorr_snp triggers corr_filter fallback for stubborn rnorm(n) ) colnames(X) <- paste0("v", 1:4) - # Covariate that is also correlated with a column C <- matrix(X[, 4] + rnorm(n, sd = 0.001), ncol = 1) - result <- qQTLR:::check_remove_highcorr_snp(X = X, C = C, strategy = "correlation") + result <- enforce_design_full_rank(X = X, C = C, strategy = "correlation") design <- cbind(1, result, C) expect_equal(qr(design)$rank, ncol(design)) }) -test_that("check_remove_highcorr_snp max_iterations warning path", { +test_that("enforce_design_full_rank max_iterations warning path", { set.seed(42) n <- 50 - # Create a rank-deficient matrix so the while loop enters - # but max_iterations = 1 limits iterations X <- matrix(rnorm(n * 4), nrow = n, ncol = 4) colnames(X) <- paste0("v", 1:4) - X[, 3] <- X[, 1] + X[, 2] # linearly dependent - X[, 4] <- X[, 1] - X[, 2] # another dependency - C <- NULL - # With max_iterations = 1, it may not fully resolve rank deficiency, - # triggering the corr_filter fallback + X[, 3] <- X[, 1] + X[, 2] + X[, 4] <- X[, 1] - X[, 2] expect_warning( - qQTLR:::check_remove_highcorr_snp(X = X, C = C, strategy = "correlation", max_iterations = 1), - "Maximum iterations reached" + enforce_design_full_rank(X = X, C = NULL, strategy = "correlation", max_iterations = 1), + "max_iterations reached" ) })