StatFunGen · danielnachun · Apr 22, 2026 · Apr 22, 2026
diff --git a/.github/recipe/recipe.yaml b/.github/recipe/recipe.yaml
@@ -23,7 +23,6 @@ requirements:
     - ${{ stdlib('c') }}
     - ${{ compiler('cxx') }}
   host:
-    - bioconductor-biostrings
     - bioconductor-iranges
     - bioconductor-qvalue
     - bioconductor-s4vectors

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -14,7 +14,6 @@ Authors@R: c(person("Gao Wang",role = c("cre","aut"),
                   person("StatFunGen Lab", role = "ctb"))
 License: MIT + file LICENSE
 Imports:
-    Biostrings,
     GenomicRanges,
     IRanges,
     MungeSumstats,

diff --git a/R/allele_qc.R b/R/allele_qc.R
@@ -28,7 +28,7 @@ match_ref_panel <- function(target_data, ref_variants, col_to_flip = NULL,
                      remove_indels = FALSE, remove_strand_ambiguous = TRUE,
                      flip_strand = FALSE, remove_unmatched = TRUE, ...) {
 	strand_flip <- function(ref) {
-	  as.character(Biostrings::reverseComplement(Biostrings::DNAStringSet(ref)))
+	  chartr("ATCG", "TAGC", ref)
 	}
 
   # helper to sanitize column names to avoid NA/empty names that break dplyr verbs

diff --git a/pixi.toml b/pixi.toml
@@ -41,7 +41,6 @@ r45 = {features = ["r45"]}
 "r-rcmdcheck" = "*"
 "r-testthat" = "*"
 "r-tidyverse" = "*"
-"bioconductor-biostrings" = "*"
 "bioconductor-iranges" = "*"
 "bioconductor-qvalue" = "*"
 "bioconductor-s4vectors" = "*"

diff --git a/vignettes/dentist.Rmd b/vignettes/dentist.Rmd
@@ -66,6 +66,9 @@ Here we demonstrate count mode on the full dataset, which avoids the "<2000 vari
 First, we load the genotype data and align alleles between the summary statistics and the reference panel.
 
 ```{r prepare-data}
+# Compute z-scores from beta/se
+sumstat$z <- sumstat$beta / sumstat$se
+
 # Load reference panel variant info
 bim <- as.data.frame(vroom(paste0(bfile, ".bim"),
                            col_names = c("chrom", "variant_id", "gd", "pos", "A1", "A2"),
@@ -99,7 +102,7 @@ ref_df <- data.frame(
 qc_result <- allele_qc(
   target_data = target_df, ref_variants = ref_df,
   col_to_flip = "z", match_min_prop = 0,
-  remove_dups = TRUE, remove_strand_ambiguous = TRUE
+  remove_dups = TRUE, remove_indels = TRUE, remove_strand_ambiguous = TRUE
 )
 aligned <- qc_result$target_data_qced
 aligned <- aligned[order(aligned$pos), ]