From bb9a52c087d953a492282f9fc36b266d3d59e1b3 Mon Sep 17 00:00:00 2001
From: gw <wangow@gmail.com>
Date: Sat, 18 Apr 2026 07:04:05 -0400
Subject: [PATCH 1/2] Properly import pecotmr functions

---
 DESCRIPTION                |   1 +
 NAMESPACE                  |   7 +++
 R/qQTLR-package.R          |  10 ++++
 R/quail_vqtl.R             | 107 ++++++++++---------------------------
 R/quantile_twas.R          |   2 +-
 R/quantile_twas_pipeline.R |   8 +--
 R/quantile_twas_weight.R   |  12 ++---
 R/zzz.R                    |  17 ++++++
 man/qQTLR-package.Rd       |  29 ++++++++++
 9 files changed, 103 insertions(+), 90 deletions(-)
 create mode 100644 R/qQTLR-package.R
 create mode 100644 R/zzz.R
 create mode 100644 man/qQTLR-package.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index de0b4ef..5bf3335 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -18,6 +18,7 @@ Imports:
     pecotmr,
     readr,
     stats,
+    susieR,
     tidyr,
     vroom
 Suggests:
diff --git a/NAMESPACE b/NAMESPACE
index 1825714..1fb6b97 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -15,6 +15,12 @@ importFrom(dplyr,"%>%")
 importFrom(dplyr,mutate)
 importFrom(dplyr,select)
 importFrom(parallel,mclapply)
+importFrom(pecotmr,clean_context_names)
+importFrom(pecotmr,filter_variants_by_ld_reference)
+importFrom(pecotmr,find_data)
+importFrom(pecotmr,harmonize_twas)
+importFrom(pecotmr,parse_variant_id)
+importFrom(pecotmr,twas_analysis)
 importFrom(stats,as.dist)
 importFrom(stats,cor)
 importFrom(stats,cov)
@@ -22,5 +28,6 @@ importFrom(stats,cutree)
 importFrom(stats,hclust)
 importFrom(stats,rnorm)
 importFrom(stats,var)
+importFrom(susieR,univariate_regression)
 importFrom(tidyr,pivot_wider)
 importFrom(tidyr,separate)
diff --git a/R/qQTLR-package.R b/R/qQTLR-package.R
new file mode 100644
index 0000000..2ced6b5
--- /dev/null
+++ b/R/qQTLR-package.R
@@ -0,0 +1,10 @@
+#' qQTLR: Quantile QTL Analysis
+#'
+#' Tools for quantile QTL analysis, including QUAIL vQTL mapping,
+#' quantile TWAS weight training, and quantile TWAS pipelines.
+#'
+#' @importFrom pecotmr parse_variant_id harmonize_twas twas_analysis find_data
+#'   clean_context_names filter_variants_by_ld_reference
+#' @importFrom susieR univariate_regression
+#' @keywords internal
+"_PACKAGE"
diff --git a/R/quail_vqtl.R b/R/quail_vqtl.R
index 6df9c41..524841a 100644
--- a/R/quail_vqtl.R
+++ b/R/quail_vqtl.R
@@ -1,21 +1,17 @@
-#' Perform Univariate Regression for Genotype Data
+#' Univariate regression with z/p/q augmentation
 #'
-#' @param X Numeric matrix of genotypes (n x p), where n is the number of samples and p is the number of SNPs.
+#' Thin wrapper around \code{susieR::univariate_regression} that strips rows
+#' with NA in \code{y}, then augments the returned list with z-scores,
+#' p-values, q-values, and names derived from \code{colnames(X)}.
+#'
+#' @param X Numeric matrix of genotypes (n x p).
 #' @param y Numeric vector of phenotypes (length n).
-#' @param Z Optional numeric matrix of covariates (n x k), where k is the number of covariates.
-#' @param center Logical, whether to center the data (default: TRUE).
-#' @param scale Logical, whether to scale the data (default: FALSE).
+#' @param Z Optional numeric matrix of covariates (n x k).
+#' @param center Logical, whether to center (default: TRUE).
+#' @param scale Logical, whether to scale (default: FALSE).
 #' @param return_residuals Logical, whether to return residuals (default: FALSE).
-#' @return A list containing regression results: \code{betahat}, \code{sebetahat}, \code{z_scores}, \code{p_values}, and \code{q_values}.
-#' @examples
-#' X <- matrix(rnorm(1000), 100, 10)
-#' y <- rnorm(100)
-#' results <- univariate_regression(X, y)
-#' @noRd
-calc_stderr <- function(X, residuals) {
-  sqrt(diag(sum(residuals^2) / (nrow(X) - 2) * chol2inv(chol(crossprod(X)))))
-}
-
+#' @return A list with \code{betahat}, \code{sebetahat}, \code{z_scores},
+#'   \code{p_values}, \code{q_values}, and optionally \code{residuals}.
 #' @noRd
 univariate_regression <- function(X, y, Z = NULL, center = TRUE,
                                   scale = FALSE, return_residuals = FALSE) {
@@ -23,73 +19,26 @@ univariate_regression <- function(X, y, Z = NULL, center = TRUE,
   if (length(y_na)) {
     X <- X[-y_na, ]
     y <- y[-y_na]
+    if (!is.null(Z)) Z <- Z[-y_na, , drop = FALSE]
   }
 
-  if (center) {
-    y <- y - mean(y)
-    X <- scale(X, center = TRUE, scale = scale)
-  } else {
-    X <- scale(X, center = FALSE, scale = scale)
-  }
-
-  X[is.nan(X)] <- 0
-
-  if (!is.null(Z)) {
-    if (center) {
-      Z <- scale(Z, center = TRUE, scale = scale)
-    }
-    residual_x <- X # Store X before modifying y
-    y <- .lm.fit(Z, y)$residuals
-  } else {
-    residual_x <- X
-  }
-
-  output <- try(do.call(
-    rbind,
-    lapply(1:ncol(X), function(i) {
-      g <- .lm.fit(cbind(1, X[, i]), y)
-      return(c(coef(g)[2], calc_stderr(cbind(1, X[, i]), g$residuals)[2]))
-    })
-  ), silent = TRUE)
-
-  # Exception handling
-  if (inherits(output, "try-error")) {
-    output <- matrix(0, ncol(X), 2)
-    for (i in 1:ncol(X)) {
-      fit <- summary(lm(y ~ X[, i]))$coef
-      if (nrow(fit) == 2) {
-        output[i, ] <- as.vector(summary(lm(y ~ X[, i]))$coef[2, 1:2])
-      } else {
-        output[i, ] <- c(0, 0)
-      }
-    }
-  }
-
-  # Calculate z-scores (t-statistics)
-  z_scores <- output[, 1] / output[, 2]
-
-  # Calculate p-values from z-scores
-  p_values <- 2 * pnorm(abs(z_scores), lower.tail = FALSE)
-
-  # Calculate q-values using the provided function
-  q_values <- pecotmr:::compute_qvalues(p_values)
-
-  # Use residual_x's column names as the column names for the results
-  rownames(output) <- colnames(residual_x)
-
-  result_list <- list(
-    betahat = setNames(output[, 1], rownames(output)),
-    sebetahat = setNames(output[, 2], rownames(output)),
-    z_scores = setNames(z_scores, rownames(output)),
-    p_values = setNames(p_values, rownames(output)),
-    q_values = setNames(q_values, rownames(output))
+  nm <- colnames(X)
+  result <- susieR::univariate_regression(
+    X, y,
+    Z = Z, center = center, scale = scale,
+    return_residuals = return_residuals
   )
 
-  if (return_residuals && !is.null(Z)) {
-    result_list$residuals <- y
-  }
-
-  return(result_list)
+  z_scores <- result$betahat / result$sebetahat
+  p_values <- 2 * pnorm(abs(z_scores), lower.tail = FALSE)
+  q_values <- compute_qvalues(p_values)
+
+  result$betahat   <- setNames(result$betahat,   nm)
+  result$sebetahat <- setNames(result$sebetahat, nm)
+  result$z_scores  <- setNames(z_scores, nm)
+  result$p_values  <- setNames(p_values, nm)
+  result$q_values  <- setNames(q_values, nm)
+  result
 }
 
 
@@ -117,7 +66,7 @@ run_linear_regression <- function(genotype, phenotype, covariates = NULL, phenot
     scale = FALSE
   )
 
-  snp_info <- pecotmr::parse_variant_id(colnames(genotype))
+  snp_info <- parse_variant_id(colnames(genotype))
 
   data.frame(
     phenotype_id = if (!is.null(phenotype_id)) phenotype_id else NA,
diff --git a/R/quantile_twas.R b/R/quantile_twas.R
index 088ce22..4765f83 100644
--- a/R/quantile_twas.R
+++ b/R/quantile_twas.R
@@ -377,7 +377,7 @@ load_quantile_twas_weights <- function(weight_db_files, tau_values = seq(0.01, 0
           if (!is.null(context_data$twas_weight) && !is.null(context_data$pseudo_R2)) {
             list(
               gene = gene,
-              context = pecotmr::clean_context_names(context, gene),
+              context = clean_context_names(context, gene),
               data = context_data
             )
           } else {
diff --git a/R/quantile_twas_pipeline.R b/R/quantile_twas_pipeline.R
index 6d79a17..2b2cfe6 100644
--- a/R/quantile_twas_pipeline.R
+++ b/R/quantile_twas_pipeline.R
@@ -165,7 +165,7 @@ quantile_twas_pipeline <- function(twas_weights_data,
   }
 
   # Step 1: Harmonize TWAS data
-  twas_data_qced_result <- pecotmr::harmonize_twas(twas_weights_data, ld_meta_file_path, gwas_meta_file,
+  twas_data_qced_result <- harmonize_twas(twas_weights_data, ld_meta_file_path, gwas_meta_file,
                                                     ld_reference_sample_size = ld_reference_sample_size,
                                                     column_file_path = column_file_path, comment_string = comment_string)
   twas_results_db <- lapply(names(twas_weights_data), function(weight_db) {
@@ -202,7 +202,7 @@ quantile_twas_pipeline <- function(twas_weights_data,
         if (length(twas_variants) == 0) {
           return(list(twas_rs_df = data.frame()))
         }
-        twas_rs <- pecotmr::twas_analysis(
+        twas_rs <- twas_analysis(
           twas_data_qced[[weight_db]][["weights_qced"]][[context]][[study]][["weights"]],
           twas_data_qced[[weight_db]][["gwas_qced"]][[study]],
           twas_data_qced[[weight_db]][["LD"]], twas_variants
@@ -212,8 +212,8 @@ quantile_twas_pipeline <- function(twas_weights_data,
         }
         twas_rs_df <- data.frame(
           gwas_study = study, method = sub("_[^_]+$", "", names(twas_rs)),
-          twas_z = pecotmr::find_data(twas_rs, c(2, "z")),
-          twas_pval = pecotmr::find_data(twas_rs, c(2, "pval")),
+          twas_z = find_data(twas_rs, c(2, "z")),
+          twas_pval = find_data(twas_rs, c(2, "pval")),
           context = context, molecular_id = weight_db
         )
         return(list(twas_rs_df = twas_rs_df))
diff --git a/R/quantile_twas_weight.R b/R/quantile_twas_weight.R
index 29dbb66..c33fdfd 100644
--- a/R/quantile_twas_weight.R
+++ b/R/quantile_twas_weight.R
@@ -82,7 +82,7 @@ qr_screen <- function(
     pvec[ip] <- pvalue
   }
 
-  pvec <- apply(quantile.pvalue, 1, pecotmr:::pval_cauchy)
+  pvec <- apply(quantile.pvalue, 1, pval_cauchy)
 
   if (screen_method == "fdr") {
     adjusted_pvalues <- p.adjust(pvec)
@@ -90,10 +90,10 @@ qr_screen <- function(
     method_quantile_names <- paste0("fdr_p_qr_", tau.list)
     quantile_adjusted_pvalues <- apply(quantile.pvalue, 2, p.adjust)
   } else if (screen_method == "qvalue") {
-    adjusted_pvalues <- pecotmr:::compute_qvalues(pvec)
+    adjusted_pvalues <- compute_qvalues(pvec)
     method_col_name <- "qvalue_qr"
     method_quantile_names <- paste0("qvalue_qr_", tau.list)
-    quantile_adjusted_pvalues <- apply(quantile.pvalue, 2, pecotmr:::compute_qvalues)
+    quantile_adjusted_pvalues <- apply(quantile.pvalue, 2, compute_qvalues)
   } else {
     stop("Invalid screen_method. Choose 'fdr' or 'qvalue'.")
   }
@@ -138,7 +138,7 @@ qr_screen <- function(
   }
 
   # Split variant_id and reorder columns
-  parsed <- pecotmr::parse_variant_id(df_result$variant_id)
+  parsed <- parse_variant_id(df_result$variant_id)
   df_result <- df_result %>%
     mutate(chr = parsed$chrom, pos = parsed$pos, A2 = parsed$A2, A1 = parsed$A1)
 
@@ -354,7 +354,7 @@ perform_qr_analysis <- function(X, Y, Z = NULL, tau_values = seq(0.05, 0.95, by
       values_from = predictor_coef,
       names_prefix = "coef_qr_"
     )
-  parsed_ids <- pecotmr::parse_variant_id(result_table_wide$variant_id)
+  parsed_ids <- parse_variant_id(result_table_wide$variant_id)
   result_table_wide <- result_table_wide %>%
     mutate(chr = parsed_ids$chrom, pos = parsed_ids$pos, A2 = parsed_ids$A2, A1 = parsed_ids$A1) %>%
     select(chr, pos, A2, A1, everything())
@@ -959,7 +959,7 @@ quantile_twas_weight_pipeline <- function(X, Y, Z = NULL, maf = NULL, region_id
     message("Starting LD panel filtering...")
     ld_result <- tryCatch(
       {
-        variants_kept <- pecotmr::filter_variants_by_ld_reference(colnames(X_filtered), ld_reference_meta_file)
+        variants_kept <- filter_variants_by_ld_reference(colnames(X_filtered), ld_reference_meta_file)
         if (length(variants_kept$data) == 0) NULL else variants_kept
       },
       error = function(e) {
diff --git a/R/zzz.R b/R/zzz.R
new file mode 100644
index 0000000..e1521d9
--- /dev/null
+++ b/R/zzz.R
@@ -0,0 +1,17 @@
+# Lazy-bind unexported pecotmr helpers into qQTLR's namespace.
+# Resolving at load time via utils::getFromNamespace means call sites can use
+# bare names (no `pecotmr:::` prefix) without triggering the R CMD check
+# warning "':::' calls which should be '::'".
+
+compute_qvalues <- NULL
+pval_cauchy     <- NULL
+
+.onLoad <- function(libname, pkgname) {
+  ns_self <- asNamespace(pkgname)
+  assign("compute_qvalues",
+         utils::getFromNamespace("compute_qvalues", "pecotmr"),
+         envir = ns_self)
+  assign("pval_cauchy",
+         utils::getFromNamespace("pval_cauchy", "pecotmr"),
+         envir = ns_self)
+}
diff --git a/man/qQTLR-package.Rd b/man/qQTLR-package.Rd
new file mode 100644
index 0000000..6f1eecc
--- /dev/null
+++ b/man/qQTLR-package.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/qQTLR-package.R
+\docType{package}
+\name{qQTLR-package}
+\alias{qQTLR}
+\alias{qQTLR-package}
+\title{qQTLR: Quantile QTL Analysis}
+\description{
+Tools for quantile QTL analysis, including QUAIL vQTL mapping,
+quantile TWAS weight training, and quantile TWAS pipelines.
+}
+\seealso{
+Useful links:
+\itemize{
+  \item \url{https://github.com/StatFunGen/qQTLR}
+  \item Report bugs at \url{https://github.com/StatFunGen/qQTLR/issues}
+}
+
+}
+\author{
+\strong{Maintainer}: Anjing Liu \email{anjing.liu@example.com}
+
+Other contributors:
+\itemize{
+  \item Gao Wang [contributor]
+}
+
+}
+\keyword{internal}

From 2c6ec447474157f90f91c6a8f9baf0e5ee5704de Mon Sep 17 00:00:00 2001
From: gw <wangow@gmail.com>
Date: Sat, 18 Apr 2026 07:32:07 -0400
Subject: [PATCH 2/2] Delegate LD pruning / clumping to pecotmr

Removed three local helpers (corr_filter, check_remove_highcorr_snp,
remove_highcorr_snp) and consume pecotmr equivalents instead:
- ld_prune_by_correlation replaces corr_filter
- enforce_design_full_rank replaces check_remove_highcorr_snp (exported,
  imported via @importFrom)
- drop_collinear_columns replaces remove_highcorr_snp (unexported in
  pecotmr; bound via getFromNamespace in zzz.R)

multicontext_ld_clumping rewritten as a thin orchestrator over
pecotmr::ld_clump_by_score: chr/pos parsed once, per-tau clumping via
-log10(p) scoring, final clump over the union scored by MAF. All
bigsnpr/bigstatsr boilerplate (FBM wrapping, single-variant shortcut)
now lives in pecotmr.

Migrated the corresponding testthat coverage to pecotmr. Kept qQTLR
integration tests that exercise enforce_design_full_rank via the
imported symbol and the warning path.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 NAMESPACE                           |   7 +-
 R/qQTLR-package.R                   |   1 +
 R/quantile_twas_weight.R            | 362 +++-------------------------
 R/zzz.R                             |  14 +-
 man/corr_filter.Rd                  |  19 --
 man/multicontext_ld_clumping.Rd     |  23 +-
 tests/testthat/test_quantile_twas.R | 216 +----------------
 7 files changed, 75 insertions(+), 567 deletions(-)
 delete mode 100644 man/corr_filter.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 1fb6b97..8eb76f3 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -9,25 +9,24 @@ export(perform_qr_analysis)
 export(qr_screen)
 export(quantile_twas_pipeline)
 export(quantile_twas_weight_pipeline)
-importFrom(bigsnpr,snp_clumping)
-importFrom(bigstatsr,FBM.code256)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,mutate)
 importFrom(dplyr,select)
 importFrom(parallel,mclapply)
 importFrom(pecotmr,clean_context_names)
+importFrom(pecotmr,enforce_design_full_rank)
 importFrom(pecotmr,filter_variants_by_ld_reference)
 importFrom(pecotmr,find_data)
 importFrom(pecotmr,harmonize_twas)
+importFrom(pecotmr,ld_clump_by_score)
+importFrom(pecotmr,ld_prune_by_correlation)
 importFrom(pecotmr,parse_variant_id)
 importFrom(pecotmr,twas_analysis)
 importFrom(stats,as.dist)
-importFrom(stats,cor)
 importFrom(stats,cov)
 importFrom(stats,cutree)
 importFrom(stats,hclust)
 importFrom(stats,rnorm)
-importFrom(stats,var)
 importFrom(susieR,univariate_regression)
 importFrom(tidyr,pivot_wider)
 importFrom(tidyr,separate)
diff --git a/R/qQTLR-package.R b/R/qQTLR-package.R
index 2ced6b5..1cc3bf5 100644
--- a/R/qQTLR-package.R
+++ b/R/qQTLR-package.R
@@ -5,6 +5,7 @@
 #'
 #' @importFrom pecotmr parse_variant_id harmonize_twas twas_analysis find_data
 #'   clean_context_names filter_variants_by_ld_reference
+#'   ld_prune_by_correlation ld_clump_by_score enforce_design_full_rank
 #' @importFrom susieR univariate_regression
 #' @keywords internal
 "_PACKAGE"
diff --git a/R/quantile_twas_weight.R b/R/quantile_twas_weight.R
index c33fdfd..8307b84 100644
--- a/R/quantile_twas_weight.R
+++ b/R/quantile_twas_weight.R
@@ -166,121 +166,83 @@ qr_screen <- function(
   ))
 }
 
-#' Perform Clumping and Pruning
-#' @param X Matrix of genotypes
-#' @param qr_results Results from QR_screen
-#' @param maf_list List of minor allele frequencies (optional)
-#' @param ld_clump_r2 R-squared threshold for initial LD clumping based on pvalue
-#' @param final_clump_r2 R-squared threshold for final LD clumping based on MAF
-#' @return A list containing final SNPs and clumped SNPs
-#' @importFrom bigstatsr FBM.code256
-#' @importFrom bigsnpr snp_clumping
+#' Perform Clumping and Pruning Across Contexts (Quantiles)
+#'
+#' Two-stage LD clumping tailored to quantile-regression screens: first, a
+#' per-tau p-value-weighted clump; second, a MAF-weighted (or uninformed)
+#' final clump over the per-tau union. Delegates the underlying bigsnpr
+#' call to \code{pecotmr::ld_clump_by_score}.
+#'
+#' @param X Matrix of genotypes (n x p_sig) restricted to significant SNPs.
+#' @param qr_results Output of \code{qr_screen()}; must contain
+#'   \code{sig.SNPs_names}, \code{tau_list}, \code{quantile_pvalue}, and
+#'   \code{sig_SNP_threshold}.
+#' @param maf_list Optional per-SNP minor allele frequencies aligned to the
+#'   original (pre-screen) SNP ordering, used to score the final clump.
+#' @param ld_clump_r2 r-squared threshold for per-tau clumping. Default 0.2.
+#' @param final_clump_r2 r-squared threshold for the final MAF clump.
+#'   Default 0.8.
+#' @return A list with \code{final_SNPs} and \code{clumped_SNPs}.
 #' @export
 multicontext_ld_clumping <- function(X, qr_results, maf_list = NULL, ld_clump_r2 = 0.2, final_clump_r2 = 0.8) {
-  # Extract significant SNP names
   sig_SNPs_names <- qr_results$sig.SNPs_names
 
-  # If no significant SNPs, return empty result
   if (length(sig_SNPs_names) == 0) {
     return(list(final_SNPs = NULL, clumped_SNPs = NULL, message = "No significant SNPs found"))
   }
 
-  # Check if X only contains one SNP (one column)
   if (ncol(X) == 1) {
-    print("Only one SNP in X. Skipping LD clumping.")
+    message("Only one SNP in X. Skipping LD clumping.")
     final_SNPs <- colnames(X)
-    return(list(final_SNPs = final_SNPs, clumped_SNPs = final_SNPs, message = "Only one SNP, no LD clumping performed"))
+    return(list(final_SNPs = final_SNPs, clumped_SNPs = final_SNPs,
+                message = "Only one SNP, no LD clumping performed"))
   }
 
-  # Extract genotype matrix for significant SNPs
-  G_all <- X # Only retain columns for significant SNPs
-
-  # Convert the genotype matrix into FBM format
-  code_vec <- c(0, 1, 2, rep(NA, 256 - 3))
-  G_all <- FBM.code256(
-    nrow = nrow(G_all),
-    ncol = ncol(G_all),
-    init = G_all,
-    code = code_vec
-  )
-
-  # Parse SNP names to extract chromosome and position information
+  # Parse chr/pos once.
   parsed_snp_info <- do.call(rbind, strsplit(sig_SNPs_names, ":"))
   chr <- as.numeric(sub("^chr", "", parsed_snp_info[, 1]))
-  pos <- as.numeric(parsed_snp_info[, 2]) # Extract position
+  pos <- as.numeric(parsed_snp_info[, 2])
 
-  # Step 1: Perform LD clumping for each tau based on p-values
+  # Stage 1: per-tau clump by -log10(p).
   clumped_snp_list <- list()
-
   for (itau in seq_along(qr_results$tau_list)) {
     tau_name <- paste0("p_qr_", qr_results$tau_list[itau])
-
-    # Extract p-values for the given quantile
     p_values_quantile <- qr_results$quantile_pvalue[, tau_name][qr_results$sig_SNP_threshold]
-    log_p_values <- -log10(p_values_quantile) # Calculate log10 p-values
-
-    # Perform LD clumping using p-values as S
-    ind_clumped <- snp_clumping(
-      G = G_all,
-      infos.chr = chr,
-      infos.pos = pos,
-      S = log_p_values, # Use log10 p-values for each quantile
-      thr.r2 = ld_clump_r2,
-      size = 100 / ld_clump_r2
-    ) # Window size of 500kb
-
-    # Store clumping results for each quantile
-    clumped_snp_list[[tau_name]] <- ind_clumped
+    log_p_values <- -log10(p_values_quantile)
+    clumped_snp_list[[tau_name]] <- ld_clump_by_score(
+      X, score = log_p_values, chr = chr, pos = pos, r2 = ld_clump_r2
+    )
   }
 
-  # Step 2: Take the union of clumping results across all quantiles
-  clumped_snp_union <- unique(unlist(clumped_snp_list)) # This is the SNP index, not the name
+  clumped_snp_union <- unique(unlist(clumped_snp_list))
   clumped_SNPs_name <- sig_SNPs_names[clumped_snp_union]
-  print(paste("Number of SNPs after union of clumping:", length(clumped_snp_union)))
+  message("Number of SNPs after union of clumping: ", length(clumped_snp_union))
 
   if (length(clumped_snp_union) == 1) {
-    message("Only one SNP found in the union. Skipping LD pruning and returning the single SNP directly.")
+    message("Only one SNP found in the union. Skipping final LD clumping.")
     final_SNPs <- sig_SNPs_names[clumped_snp_union]
     return(list(final_SNPs = final_SNPs, clumped_SNPs = clumped_SNPs_name))
   }
 
-
-  # Step 3: Sort results from union
+  # Stage 2: final clump over the union, ordered by chr/pos, scored by MAF (or NULL).
   sorted_indices <- order(chr[clumped_snp_union], pos[clumped_snp_union])
   chr_sorted <- chr[clumped_snp_union][sorted_indices]
   pos_sorted <- pos[clumped_snp_union][sorted_indices]
 
-  # Step 4: Initialize maf_values to NULL, and update only if maf_list is provided
   maf_values <- NULL
   if (!is.null(maf_list)) {
     maf_values <- maf_list[qr_results$sig_SNP_threshold][clumped_snp_union][sorted_indices]
   }
   G_union <- X[, clumped_snp_union, drop = FALSE][, sorted_indices]
-  if (!inherits(G_union, "FBM")) {
-    G_union <- FBM.code256(
-      nrow = nrow(G_union),
-      ncol = ncol(G_union),
-      init = G_union,
-      code = code_vec
-    )
-  }
-  # Step 5: Perform final clumping using maf_values (if available), otherwise proceed with NULL S
-  final_clumped <- snp_clumping(
-    G = G_union,
-    infos.chr = chr_sorted,
-    infos.pos = pos_sorted,
-    S = maf_values, # Use MAF values if provided, otherwise NULL
-    thr.r2 = final_clump_r2,
-    size = 100 / final_clump_r2
-  ) # Final clumping
-
-  # Restrict the genotype matrix to only the final clumped SNPs
-  G_final_clumped <- G_union[, final_clumped, drop = FALSE] # Limit G to the final clumped SNPs
-  # Get the final SNP names
+
+  final_clumped <- ld_clump_by_score(
+    G_union, score = maf_values, chr = chr_sorted, pos = pos_sorted, r2 = final_clump_r2
+  )
+
   final_SNPs <- sig_SNPs_names[clumped_snp_union][sorted_indices][final_clumped]
-  print(paste("Number of final SNPs after MAF-based clumping (if applied):", length(final_SNPs)))
+  message("Number of final SNPs after MAF-based clumping (if applied): ", length(final_SNPs))
 
-  return(list(final_SNPs = final_SNPs, clumped_SNPs = clumped_SNPs_name))
+  list(final_SNPs = final_SNPs, clumped_SNPs = clumped_SNPs_name)
 }
 
 #' Perform Quantile Regression Analysis to get beta
@@ -363,250 +325,6 @@ perform_qr_analysis <- function(X, Y, Z = NULL, tau_values = seq(0.05, 0.95, by
   return(result_table_wide)
 }
 
-#' Filter Highly Correlated SNPs
-#' @param X Matrix of genotypes
-#' @param cor_thres Correlation threshold for filtering
-#' @return A list containing filtered X matrix and filter IDs
-corr_filter <- function(X, cor_thres = 0.8) {
-  p <- ncol(X)
-
-  # Use Rfast for faster correlation calculation if available
-  if (requireNamespace("Rfast", quietly = TRUE)) {
-    cor.X <- Rfast::cora(X, large = TRUE)
-  } else {
-    cor.X <- cor(X)
-  }
-  Sigma.distance <- as.dist(1 - abs(cor.X))
-  fit <- hclust(Sigma.distance, method = "single")
-  clusters <- cutree(fit, h = 1 - cor_thres)
-  groups <- unique(clusters)
-  ind.delete <- NULL
-  X.new <- X
-  filter.id <- c(1:p)
-  for (ig in seq_along(groups)) {
-    temp.group <- which(clusters == groups[ig])
-    if (length(temp.group) > 1) {
-      ind.delete <- c(ind.delete, temp.group[-1])
-    }
-  }
-  ind.delete <- unique(ind.delete)
-  if ((length(ind.delete)) > 0) {
-    X.new <- as.matrix(X[, -ind.delete])
-    filter.id <- filter.id[-ind.delete]
-  }
-
-  # Check if X.new has only one column and ensure column names are preserved
-  if (ncol(X.new) == 1) {
-    colnames(X.new) <- colnames(X)[-ind.delete]
-  }
-
-  return(list(X.new = X.new, filter.id = filter.id))
-}
-
-
-
-#' Check and Remove Problematic Columns to Ensure Full Rank
-#'
-#' This function checks for problematic columns in the design matrix that cause it to be not full rank,
-#' and iteratively removes them based on the chosen strategy until the matrix is full rank.
-#'
-#' @param X Matrix of SNPs
-#' @param C Matrix of covariates (unnamed)
-#' @param strategy The strategy for removing problematic columns ("variance", "correlation", or "response_correlation")
-#' @param response Optional response vector for "response_correlation" strategy
-#' @param max_iterations Maximum number of iterations to attempt removing problematic columns
-#' @return Cleaned matrix X with problematic columns removed
-#' @noRd
-check_remove_highcorr_snp <- function(X = X, C = C, strategy = c("correlation", "variance", "response_correlation"), response = NULL, max_iterations = 300, corr_thresholds = seq(0.75, 0.5, by = -0.05)) {
-  strategy <- match.arg(strategy)
-  original_colnames <- colnames(X)
-  initial_ncol <- ncol(X) # Store the initial number of columns in X
-  iteration <- 0
-  # Combine the design matrix with X (SNPs) and C (covariates), keeping C without column names
-  X_design <- cbind(1, X, C) # Add an intercept column (1)
-  colnames_X_design <- c("Intercept", colnames(X)) # Assign column names only to X (SNPs) part
-
-  # Assign column names only to the X part, leaving C without names
-  colnames(X_design)[1:(length(colnames_X_design))] <- colnames_X_design
-
-  # Check the initial rank of the design matrix
-  matrix_rank <- qr(X_design)$rank
-  message("Initial rank of the design matrix: ", matrix_rank, " / ", ncol(X_design), " columns.")
-
-  # Skip remove_highcorr_snp if removing all problematic columns doesn't achieve full rank
-  skip_remove_highcorr <- FALSE
-
-  # First check: Try removing all problematic columns at once
-  if (matrix_rank < ncol(X_design)) {
-    message("Design matrix is not full rank, identifying all problematic columns...")
-
-    # QR decomposition to identify linearly dependent columns
-    qr_decomp <- qr(X_design)
-    R <- qr_decomp$rank
-    Q <- qr_decomp$pivot
-
-    # Get all problematic columns
-    problematic_cols <- Q[(R + 1):ncol(X_design)]
-    problematic_colnames <- colnames(X_design)[problematic_cols]
-    problematic_colnames <- problematic_colnames[problematic_colnames %in% colnames(X)]
-
-    if (length(problematic_colnames) > 0) {
-      message("Attempting to remove all problematic columns at once: ", paste(problematic_colnames, collapse = ", "))
-
-      # Remove all problematic columns at once
-      X_temp <- X[, !(colnames(X) %in% problematic_colnames), drop = FALSE]
-      X_design_temp <- cbind(1, X_temp, C)
-      colnames_X_design_temp <- c("Intercept", colnames(X_temp))
-      colnames(X_design_temp)[1:length(colnames_X_design_temp)] <- colnames_X_design_temp
-
-      # Check if removing all problematic columns achieves full rank
-      matrix_rank_temp <- qr(X_design_temp)$rank
-
-      if (matrix_rank_temp == ncol(X_design_temp)) {
-        message("Achieved full rank by removing all problematic columns at once. Proceeding with original logic...")
-      } else {
-        message("Removing all problematic columns did not achieve full rank. Skipping to corr_filter...")
-        skip_remove_highcorr <- TRUE
-      }
-    }
-  }
-
-  # Only proceed with remove_highcorr_snp if not skipping
-  if (!skip_remove_highcorr) {
-    while (matrix_rank < ncol(X_design) && iteration < max_iterations) {
-      message("Design matrix is not full rank, identifying problematic columns...")
-
-      qr_decomp <- qr(X_design)
-      R <- qr_decomp$rank
-      Q <- qr_decomp$pivot
-      problematic_cols <- Q[(R + 1):ncol(X_design)]
-      problematic_colnames <- colnames(X_design)[problematic_cols]
-      problematic_colnames <- problematic_colnames[problematic_colnames %in% colnames(X)]
-
-      if (length(problematic_colnames) == 0) {
-        message("No more problematic SNP columns found in X. Breaking the loop.")
-        break
-      }
-
-      message("Problematic SNP columns identified: ", paste(problematic_colnames, collapse = ", "))
-      X <- remove_highcorr_snp(X, problematic_colnames, strategy = strategy, response = response)
-
-      X_design <- cbind(1, X, C)
-      colnames_X_design <- c("Intercept", colnames(X))
-      colnames(X_design)[1:length(colnames_X_design)] <- colnames_X_design
-
-      matrix_rank <- qr(X_design)$rank
-      message("New rank of the design matrix: ", matrix_rank, " / ", ncol(X_design), " columns.")
-      iteration <- iteration + 1
-    }
-
-    if (iteration == max_iterations) {
-      warning("Maximum iterations reached. The design matrix may still not be full rank.")
-    }
-  }
-
-  # Final check and corr_filter if needed
-  matrix_rank <- qr(cbind(1, X, C))$rank
-  if (matrix_rank < ncol(cbind(1, X, C))) {
-    message("Applying corr_filter to ensure the design matrix is full rank...")
-    for (threshold in corr_thresholds) {
-      filter_result <- corr_filter(X, cor_thres = threshold)
-      X <- filter_result$X.new
-      X_design <- cbind(1, X, C)
-      colnames_X_design <- c("Intercept", colnames(X))
-      colnames(X_design)[1:length(colnames_X_design)] <- colnames_X_design
-
-      matrix_rank <- qr(X_design)$rank
-      message("Rank after corr_filter with threshold ", threshold, ": ", matrix_rank, " / ", ncol(X_design), " columns.")
-      if (matrix_rank == ncol(X_design)) {
-        break
-      }
-    }
-  }
-
-
-  if (iteration == max_iterations) {
-    warning("Maximum iterations reached. The design matrix may still not be full rank.")
-  }
-
-  if (ncol(X) == 1 && initial_ncol == 1) {
-    colnames(X) <- original_colnames
-  }
-  return(X)
-}
-
-#' Remove Problematic Columns Based on a Given Strategy
-#'
-#' This function removes problematic columns from a matrix based on different strategies, such as smallest variance,
-#' highest correlation, or lowest correlation with the response variable.
-#'
-#' @param X Matrix of SNPs
-#' @param problematic_cols A vector of problematic columns to be removed
-#' @param strategy The strategy for removing problematic columns ("variance", "correlation", or "response_correlation")
-#' @param response Optional response vector for "response_correlation" strategy
-#' @return Cleaned matrix X with the selected column removed
-#' @importFrom stats var cor
-#' @noRd
-remove_highcorr_snp <- function(X, problematic_cols, strategy = c("correlation", "variance", "response_correlation"), response = NULL) {
-  # Set default strategy
-  strategy <- match.arg(strategy)
-
-  message("Identified problematic columns: ", paste(problematic_cols, collapse = ", "))
-
-  if (length(problematic_cols) == 0) {
-    return(X) # If there are no problematic columns, return as is
-  }
-
-  if (length(problematic_cols) == 1) {
-    message("Only one problematic column: ", problematic_cols)
-    col_to_remove <- problematic_cols[1]
-    message("Removing column: ", col_to_remove)
-    X <- X[, !(colnames(X) %in% col_to_remove), drop = FALSE]
-    # If X only has one column left after removal, ensure its column name is preserved
-    if (ncol(X) == 1) {
-      colnames(X) <- colnames(X)[colnames(X) != col_to_remove] # Preserve remaining SNP name
-    }
-    return(X)
-  }
-
-  # Choose columns to remove based on the strategy
-  if (strategy == "variance") {
-    # Strategy 1: Remove the column with the smallest variance
-    variances <- apply(X[, problematic_cols, drop = FALSE], 2, var)
-    col_to_remove <- problematic_cols[which.min(variances)]
-    message("Removing column with the smallest variance: ", col_to_remove)
-  } else if (strategy == "correlation") {
-    # Strategy 2: Remove the column with the highest sum of absolute correlations
-    cor_matrix <- abs(cor(X[, problematic_cols, drop = FALSE])) # Calculate absolute correlation matrix
-    diag(cor_matrix) <- 0 # Ignore the diagonal (self-correlation)
-
-    if (length(problematic_cols) == 2) {
-      # If there are only two problematic columns, randomly remove one
-      col_to_remove <- sample(problematic_cols, 1)
-      message("Only two problematic columns, randomly removing: ", col_to_remove)
-    } else {
-      # Calculate sum of absolute correlations for each column
-      cor_sums <- colSums(cor_matrix)
-      col_to_remove <- problematic_cols[which.max(cor_sums)] # Remove the column with the largest sum of correlations
-      message("Removing column with highest sum of absolute correlations: ", col_to_remove)
-    }
-  } else if (strategy == "response_correlation" && !is.null(response)) {
-    # Strategy 3: Remove the column with the lowest correlation with the response variable
-    # FIXME: This strategy is potentially biased based on corr of response and variants
-    cor_with_response <- apply(X[, problematic_cols, drop = FALSE], 2, function(col) cor(col, response))
-    col_to_remove <- problematic_cols[which.min(abs(cor_with_response))]
-    message("Removing column with lowest correlation with the response: ", col_to_remove)
-  } else {
-    stop("Invalid strategy or missing response variable for 'response_correlation' strategy.")
-  }
-
-  # Remove the selected column from X
-  X <- X[, !(colnames(X) %in% col_to_remove), drop = FALSE]
-  if (ncol(X) == 1) {
-    colnames(X) <- colnames(X)[colnames(X) != col_to_remove] # Preserve remaining SNP name
-  }
-  return(X)
-}
 
 #' Calculate QR Coefficients and Pseudo R-squared Across Multiple Quantiles
 #'
@@ -626,7 +344,7 @@ calculate_qr_and_pseudo_R2 <- function(AssocData, tau.list, strategy = c("correl
   }
   strategy <- match.arg(strategy)
   # Check and handle problematic columns affecting the full rank of the design matrix
-  AssocData$X.filter <- check_remove_highcorr_snp(X = AssocData$X.filter, C = AssocData$C, strategy = strategy, response = AssocData$Y, corr_thresholds = corr_thresholds)
+  AssocData$X.filter <- enforce_design_full_rank(X = AssocData$X.filter, C = AssocData$C, strategy = strategy, response = AssocData$Y, corr_thresholds = corr_thresholds)
   snp_names <- colnames(AssocData$X.filter)
   # Build the cleaned design matrix using the filtered X and unnamed C
 
@@ -1030,7 +748,7 @@ quantile_twas_weight_pipeline <- function(X, Y, Z = NULL, maf = NULL, region_id
       # Step 8: Initial filter of highly correlated SNPs
       message("Filtering highly correlated SNPs...")
       if (ncol(X_for_corr) > 1) {
-        filtered <- corr_filter(X_for_corr, initial_corr_filter_cutoff)
+        filtered <- ld_prune_by_correlation(X_for_corr, initial_corr_filter_cutoff)
         X.filter <- filtered$X.new
       } else {
         X.filter <- X_for_corr
diff --git a/R/zzz.R b/R/zzz.R
index e1521d9..32d4097 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -3,15 +3,13 @@
 # bare names (no `pecotmr:::` prefix) without triggering the R CMD check
 # warning "':::' calls which should be '::'".
 
-compute_qvalues <- NULL
-pval_cauchy     <- NULL
+compute_qvalues        <- NULL
+pval_cauchy            <- NULL
+drop_collinear_columns <- NULL
 
 .onLoad <- function(libname, pkgname) {
   ns_self <- asNamespace(pkgname)
-  assign("compute_qvalues",
-         utils::getFromNamespace("compute_qvalues", "pecotmr"),
-         envir = ns_self)
-  assign("pval_cauchy",
-         utils::getFromNamespace("pval_cauchy", "pecotmr"),
-         envir = ns_self)
+  for (sym in c("compute_qvalues", "pval_cauchy", "drop_collinear_columns")) {
+    assign(sym, utils::getFromNamespace(sym, "pecotmr"), envir = ns_self)
+  }
 }
diff --git a/man/corr_filter.Rd b/man/corr_filter.Rd
deleted file mode 100644
index d2f8307..0000000
--- a/man/corr_filter.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/quantile_twas_weight.R
-\name{corr_filter}
-\alias{corr_filter}
-\title{Filter Highly Correlated SNPs}
-\usage{
-corr_filter(X, cor_thres = 0.8)
-}
-\arguments{
-\item{X}{Matrix of genotypes}
-
-\item{cor_thres}{Correlation threshold for filtering}
-}
-\value{
-A list containing filtered X matrix and filter IDs
-}
-\description{
-Filter Highly Correlated SNPs
-}
diff --git a/man/multicontext_ld_clumping.Rd b/man/multicontext_ld_clumping.Rd
index 6d0cbf0..2cd3595 100644
--- a/man/multicontext_ld_clumping.Rd
+++ b/man/multicontext_ld_clumping.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/quantile_twas_weight.R
 \name{multicontext_ld_clumping}
 \alias{multicontext_ld_clumping}
-\title{Perform Clumping and Pruning}
+\title{Perform Clumping and Pruning Across Contexts (Quantiles)}
 \usage{
 multicontext_ld_clumping(
   X,
@@ -13,19 +13,26 @@ multicontext_ld_clumping(
 )
 }
 \arguments{
-\item{X}{Matrix of genotypes}
+\item{X}{Matrix of genotypes (n x p_sig) restricted to significant SNPs.}
 
-\item{qr_results}{Results from QR_screen}
+\item{qr_results}{Output of \code{qr_screen()}; must contain
+\code{sig.SNPs_names}, \code{tau_list}, \code{quantile_pvalue}, and
+\code{sig_SNP_threshold}.}
 
-\item{maf_list}{List of minor allele frequencies (optional)}
+\item{maf_list}{Optional per-SNP minor allele frequencies aligned to the
+original (pre-screen) SNP ordering, used to score the final clump.}
 
-\item{ld_clump_r2}{R-squared threshold for initial LD clumping based on pvalue}
+\item{ld_clump_r2}{r-squared threshold for per-tau clumping. Default 0.2.}
 
-\item{final_clump_r2}{R-squared threshold for final LD clumping based on MAF}
+\item{final_clump_r2}{r-squared threshold for the final MAF clump.
+Default 0.8.}
 }
 \value{
-A list containing final SNPs and clumped SNPs
+A list with \code{final_SNPs} and \code{clumped_SNPs}.
 }
 \description{
-Perform Clumping and Pruning
+Two-stage LD clumping tailored to quantile-regression screens: first, a
+per-tau p-value-weighted clump; second, a MAF-weighted (or uninformed)
+final clump over the per-tau union. Delegates the underlying bigsnpr
+call to \code{pecotmr::ld_clump_by_score}.
 }
diff --git a/tests/testthat/test_quantile_twas.R b/tests/testthat/test_quantile_twas.R
index 1d4c4cf..18438d9 100644
--- a/tests/testthat/test_quantile_twas.R
+++ b/tests/testthat/test_quantile_twas.R
@@ -182,196 +182,6 @@ test_that("perform_grouped_integration single variant skips clustering", {
   expect_equal(ncol(result$weights), 3)
 })
 
-# ===========================================================================
-# corr_filter (internal)
-# ===========================================================================
-
-test_that("corr_filter removes highly correlated columns", {
-  set.seed(42)
-  n <- 50; p <- 10
-  X <- matrix(rnorm(n * p), nrow = n)
-  colnames(X) <- paste0("v", 1:p)
-  X[, 2] <- X[, 1] + rnorm(n, sd = 0.01)
-  result <- qQTLR:::corr_filter(X, cor_thres = 0.9)
-  expect_true(ncol(result$X.new) < p)
-  expect_true(length(result$filter.id) == ncol(result$X.new))
-})
-
-test_that("corr_filter keeps all columns when uncorrelated", {
-  set.seed(42)
-  n <- 100; p <- 5
-  X <- matrix(rnorm(n * p), nrow = n)
-  colnames(X) <- paste0("v", 1:p)
-  result <- qQTLR:::corr_filter(X, cor_thres = 0.99)
-  expect_equal(ncol(result$X.new), p)
-  expect_equal(result$filter.id, 1:p)
-})
-
-test_that("corr_filter preserves colnames for single remaining column", {
-  set.seed(42)
-  n <- 50
-  X <- matrix(rnorm(n * 3), nrow = n)
-  colnames(X) <- c("a", "b", "c")
-  X[, 2] <- X[, 1] + rnorm(n, sd = 0.001)
-  X[, 3] <- X[, 1] + rnorm(n, sd = 0.001)
-  result <- qQTLR:::corr_filter(X, cor_thres = 0.5)
-  expect_true(ncol(result$X.new) >= 1)
-  expect_true(!is.null(colnames(result$X.new)))
-})
-
-test_that("corr_filter handles single-column input", {
-  set.seed(42)
-  n <- 30
-  X <- matrix(rnorm(n), nrow = n, ncol = 1)
-  colnames(X) <- "v1"
-  expect_error(qQTLR:::corr_filter(X, cor_thres = 0.8))
-})
-
-test_that("corr_filter with low threshold removes more columns", {
-  set.seed(42)
-  n <- 100; p <- 5
-  X <- matrix(rnorm(n * p), nrow = n)
-  colnames(X) <- paste0("v", 1:p)
-  X[, 2] <- X[, 1] + rnorm(n, sd = 0.1)
-  X[, 3] <- X[, 1] + rnorm(n, sd = 0.1)
-  X[, 5] <- X[, 4] + rnorm(n, sd = 0.1)
-  result_strict <- qQTLR:::corr_filter(X, cor_thres = 0.3)
-  result_lenient <- qQTLR:::corr_filter(X, cor_thres = 0.99)
-  expect_true(ncol(result_strict$X.new) <= ncol(result_lenient$X.new))
-})
-
-test_that("corr_filter preserves colnames when no columns deleted", {
-  set.seed(42)
-  n <- 100; p <- 3
-  X <- matrix(rnorm(n * p), nrow = n)
-  colnames(X) <- c("snp_a", "snp_b", "snp_c")
-  result <- qQTLR:::corr_filter(X, cor_thres = 0.999)
-  expect_equal(colnames(result$X.new), colnames(X))
-})
-
-# ===========================================================================
-# remove_highcorr_snp (internal)
-# ===========================================================================
-
-test_that("remove_highcorr_snp returns X unchanged when no problematic columns", {
-  X <- matrix(rnorm(100), nrow = 20, ncol = 5)
-  colnames(X) <- paste0("v", 1:5)
-  result <- qQTLR:::remove_highcorr_snp(X, problematic_cols = character(0), strategy = "correlation")
-  expect_equal(ncol(result), 5)
-})
-
-test_that("remove_highcorr_snp removes single problematic column", {
-  X <- matrix(rnorm(100), nrow = 20, ncol = 5)
-  colnames(X) <- paste0("v", 1:5)
-  result <- qQTLR:::remove_highcorr_snp(X, problematic_cols = "v3", strategy = "correlation")
-  expect_equal(ncol(result), 4)
-  expect_false("v3" %in% colnames(result))
-})
-
-test_that("remove_highcorr_snp variance strategy removes lowest variance column", {
-  set.seed(42)
-  n <- 50
-  X <- matrix(rnorm(n * 3), nrow = n, ncol = 3)
-  colnames(X) <- c("low_var", "mid_var", "high_var")
-  X[, 1] <- X[, 1] * 0.01
-  X[, 3] <- X[, 3] * 10
-  result <- qQTLR:::remove_highcorr_snp(X, problematic_cols = c("low_var", "mid_var", "high_var"),
-                                           strategy = "variance")
-  expect_equal(ncol(result), 2)
-  expect_false("low_var" %in% colnames(result))
-})
-
-test_that("remove_highcorr_snp correlation strategy with two columns removes one randomly", {
-  set.seed(42)
-  X <- matrix(rnorm(100), nrow = 20, ncol = 5)
-  colnames(X) <- paste0("v", 1:5)
-  result <- qQTLR:::remove_highcorr_snp(X, problematic_cols = c("v1", "v2"), strategy = "correlation")
-  expect_equal(ncol(result), 4)
-  expect_true(xor("v1" %in% colnames(result), "v2" %in% colnames(result)) ||
-              (!("v1" %in% colnames(result)) && !("v2" %in% colnames(result))) == FALSE)
-})
-
-test_that("remove_highcorr_snp correlation strategy with 3+ cols removes highest sum", {
-  set.seed(42)
-  n <- 50
-  X <- matrix(rnorm(n * 4), nrow = n, ncol = 4)
-  colnames(X) <- paste0("v", 1:4)
-  X[, 2] <- X[, 1] + rnorm(n, sd = 0.01)
-  X[, 3] <- X[, 1] + rnorm(n, sd = 0.01)
-  result <- qQTLR:::remove_highcorr_snp(X, problematic_cols = c("v1", "v2", "v3"),
-                                           strategy = "correlation")
-  expect_equal(ncol(result), 3)
-})
-
-test_that("remove_highcorr_snp response_correlation strategy removes lowest response corr", {
-  set.seed(42)
-  n <- 50
-  X <- matrix(rnorm(n * 3), nrow = n, ncol = 3)
-  colnames(X) <- c("v1", "v2", "v3")
-  response <- X[, 1] * 2 + rnorm(n, sd = 0.1)
-  result <- qQTLR:::remove_highcorr_snp(X, problematic_cols = c("v1", "v2", "v3"),
-                                           strategy = "response_correlation",
-                                           response = response)
-  expect_equal(ncol(result), 2)
-  expect_true("v1" %in% colnames(result))
-})
-
-test_that("remove_highcorr_snp errors on invalid strategy", {
-  X <- matrix(rnorm(100), nrow = 20, ncol = 5)
-  colnames(X) <- paste0("v", 1:5)
-  expect_error(
-    qQTLR:::remove_highcorr_snp(X, problematic_cols = c("v1", "v2"),
-                                   strategy = "invalid_strategy"),
-    "arg"
-  )
-})
-
-test_that("remove_highcorr_snp preserves column name when single column remains", {
-  set.seed(42)
-  X <- matrix(rnorm(40), nrow = 20, ncol = 2)
-  colnames(X) <- c("keeper", "removed")
-  result <- qQTLR:::remove_highcorr_snp(X, problematic_cols = "removed", strategy = "correlation")
-  expect_equal(ncol(result), 1)
-  expect_equal(colnames(result), "keeper")
-})
-
-# ===========================================================================
-# check_remove_highcorr_snp (internal)
-# ===========================================================================
-
-test_that("check_remove_highcorr_snp returns full-rank matrix when already full rank", {
-  set.seed(42)
-  n <- 50
-  X <- matrix(rnorm(n * 3), nrow = n, ncol = 3)
-  colnames(X) <- paste0("v", 1:3)
-  C <- matrix(rnorm(n * 2), nrow = n, ncol = 2)
-  result <- qQTLR:::check_remove_highcorr_snp(X = X, C = C, strategy = "correlation")
-  expect_true(is.matrix(result))
-  expect_true(ncol(result) >= 1)
-})
-
-test_that("check_remove_highcorr_snp handles rank-deficient design via corr_filter fallback", {
-  set.seed(42)
-  n <- 50
-  X <- matrix(rnorm(n * 4), nrow = n, ncol = 4)
-  colnames(X) <- paste0("v", 1:4)
-  X[, 4] <- X[, 1] + X[, 2]
-  C <- NULL
-  result <- qQTLR:::check_remove_highcorr_snp(X = X, C = C, strategy = "correlation")
-  design <- cbind(1, result)
-  expect_equal(qr(design)$rank, ncol(design))
-})
-
-test_that("check_remove_highcorr_snp preserves colname for single-column input", {
-  set.seed(42)
-  n <- 50
-  X <- matrix(rnorm(n), nrow = n, ncol = 1)
-  colnames(X) <- "only_snp"
-  C <- NULL
-  result <- qQTLR:::check_remove_highcorr_snp(X = X, C = C, strategy = "correlation")
-  expect_equal(colnames(result), "only_snp")
-})
-
 # ===========================================================================
 # calculate_coef_heterogeneity (internal)
 # ===========================================================================
@@ -798,14 +608,14 @@ test_that("get_modularity handles all-negative weights", {
 })
 
 # ===========================================================================
-# check_remove_highcorr_snp corr_filter fallback (internal)
+# enforce_design_full_rank integration (imported from pecotmr)
 # ===========================================================================
 
-test_that("check_remove_highcorr_snp triggers corr_filter fallback for stubborn rank deficiency", {
+test_that("enforce_design_full_rank triggers correlation-prune fallback for stubborn rank deficiency", {
   set.seed(42)
   n <- 50
-  # Create a matrix where columns are highly correlated but not identical
-  # so QR removal won't fully fix the rank deficiency
+  # Columns highly correlated but not identical so QR removal won't
+  # fully fix the rank deficiency on its own.
   base <- rnorm(n)
   X <- cbind(
     base + rnorm(n, sd = 0.001),
@@ -814,28 +624,22 @@ test_that("check_remove_highcorr_snp triggers corr_filter fallback for stubborn
     rnorm(n)
   )
   colnames(X) <- paste0("v", 1:4)
-  # Covariate that is also correlated with a column
   C <- matrix(X[, 4] + rnorm(n, sd = 0.001), ncol = 1)
-  result <- qQTLR:::check_remove_highcorr_snp(X = X, C = C, strategy = "correlation")
+  result <- enforce_design_full_rank(X = X, C = C, strategy = "correlation")
   design <- cbind(1, result, C)
   expect_equal(qr(design)$rank, ncol(design))
 })
 
-test_that("check_remove_highcorr_snp max_iterations warning path", {
+test_that("enforce_design_full_rank max_iterations warning path", {
   set.seed(42)
   n <- 50
-  # Create a rank-deficient matrix so the while loop enters
-  # but max_iterations = 1 limits iterations
   X <- matrix(rnorm(n * 4), nrow = n, ncol = 4)
   colnames(X) <- paste0("v", 1:4)
-  X[, 3] <- X[, 1] + X[, 2]  # linearly dependent
-  X[, 4] <- X[, 1] - X[, 2]  # another dependency
-  C <- NULL
-  # With max_iterations = 1, it may not fully resolve rank deficiency,
-  # triggering the corr_filter fallback
+  X[, 3] <- X[, 1] + X[, 2]
+  X[, 4] <- X[, 1] - X[, 2]
   expect_warning(
-    qQTLR:::check_remove_highcorr_snp(X = X, C = C, strategy = "correlation", max_iterations = 1),
-    "Maximum iterations reached"
+    enforce_design_full_rank(X = X, C = NULL, strategy = "correlation", max_iterations = 1),
+    "max_iterations reached"
   )
 })