diff --git a/.github/recipe/recipe.yaml b/.github/recipe/recipe.yaml index a8095d4f..7a678ef8 100644 --- a/.github/recipe/recipe.yaml +++ b/.github/recipe/recipe.yaml @@ -23,7 +23,6 @@ requirements: - ${{ stdlib('c') }} - ${{ compiler('cxx') }} host: - - bioconductor-biostrings - bioconductor-iranges - bioconductor-qvalue - bioconductor-s4vectors diff --git a/DESCRIPTION b/DESCRIPTION index 04200fa5..3b367eb5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,7 +14,6 @@ Authors@R: c(person("Gao Wang",role = c("cre","aut"), person("StatFunGen Lab", role = "ctb")) License: MIT + file LICENSE Imports: - Biostrings, GenomicRanges, IRanges, MungeSumstats, diff --git a/R/allele_qc.R b/R/allele_qc.R index 40949f33..ba036239 100644 --- a/R/allele_qc.R +++ b/R/allele_qc.R @@ -28,7 +28,7 @@ match_ref_panel <- function(target_data, ref_variants, col_to_flip = NULL, remove_indels = FALSE, remove_strand_ambiguous = TRUE, flip_strand = FALSE, remove_unmatched = TRUE, ...) { strand_flip <- function(ref) { - as.character(Biostrings::reverseComplement(Biostrings::DNAStringSet(ref))) + chartr("ATCG", "TAGC", ref) } # helper to sanitize column names to avoid NA/empty names that break dplyr verbs diff --git a/pixi.toml b/pixi.toml index 7f054c68..c1b1961a 100644 --- a/pixi.toml +++ b/pixi.toml @@ -41,7 +41,6 @@ r45 = {features = ["r45"]} "r-rcmdcheck" = "*" "r-testthat" = "*" "r-tidyverse" = "*" -"bioconductor-biostrings" = "*" "bioconductor-iranges" = "*" "bioconductor-qvalue" = "*" "bioconductor-s4vectors" = "*" diff --git a/vignettes/dentist.Rmd b/vignettes/dentist.Rmd index 23922c9c..c56436c4 100644 --- a/vignettes/dentist.Rmd +++ b/vignettes/dentist.Rmd @@ -66,6 +66,9 @@ Here we demonstrate count mode on the full dataset, which avoids the "<2000 vari First, we load the genotype data and align alleles between the summary statistics and the reference panel. ```{r prepare-data} +# Compute z-scores from beta/se +sumstat$z <- sumstat$beta / sumstat$se + # Load reference panel variant info bim <- as.data.frame(vroom(paste0(bfile, ".bim"), col_names = c("chrom", "variant_id", "gd", "pos", "A1", "A2"), @@ -99,7 +102,7 @@ ref_df <- data.frame( qc_result <- allele_qc( target_data = target_df, ref_variants = ref_df, col_to_flip = "z", match_min_prop = 0, - remove_dups = TRUE, remove_strand_ambiguous = TRUE + remove_dups = TRUE, remove_indels = TRUE, remove_strand_ambiguous = TRUE ) aligned <- qc_result$target_data_qced aligned <- aligned[order(aligned$pos), ]