KarchinLab · KevinMLanderos · Apr 26, 2026 · Apr 26, 2026 · Apr 27, 2026 · Apr 27, 2026
diff --git a/README.md b/README.md
@@ -38,3 +38,58 @@ nextflow run KarchinLab/TCRtoolkit \
     --input_format adaptive \
     --max_memory 10GB --max_cpus 4
 ```
+## Input Formats
+
+`TCRtoolkit` accepts three input formats, specified via `--input_format`:
+
+| Format | Description |
+|---|---|
+| `adaptive` | Adaptive Biotechnologies output files |
+| `cellranger` | 10x Genomics CellRanger 'airr_rearrangement.tsv' output files (single-cell pseudo-bulk) |
+| `airr` | AIRR-compliant tab-separated files |
+
+## Workflow Levels
+
+The pipeline supports multiple levels of analysis, controlled by `--workflow_level`:
+
+| Level | Description |
+|---|---|
+| `sample` | Per-sample QC and repertoire statistics |
+| `patient` | Patient-level clonotype aggregation and comparison |
+| `compare` | Cross-cohort repertoire comparison and overlap |
+
+Levels can be combined: `--workflow_level sample,patient,compare`
+
+## HTML Reports
+
+After the pipeline finishes, `TCRtoolkit` generates interactive HTML reports using [Quarto](https://quarto.org/). Four main report notebooks are rendered automatically:
+
+| Notebook | Description |
+|---|---|
+| `template_qc.qmd` | Quality control metrics and filtering summary |
+| `template_discovery_brief.qmd` | Repertoire discovery most relevant information  |
+| `template_details_part1.qmd` | Detailed repertoire analysis, part 1 |
+| `template_details_part2.qmd` | Detailed repertoire analysis, part 2  |
+
+### Conditional Report Sections
+
+Certain sub-reports are automatically appended based on input and workflow options:
+
+- `--input_format cellranger` → includes single-cell phenotype report
+- `--input_format adaptive` → includes bulk phenotype report
+- `--workflow_level sample,patient,compare` (Patient workflow enabled) → includes patient-level clonotype analysis
- `--input_format adaptive` → includes bulk phenotype report
- `--workflow_level sample,patient,compare` (Patient workflow enabled) → includes patient-level clonotype analysis
+- `--input_format adaptive` or `airr` → includes bulk phenotype report
+- `--workflow_level` includes `patient` → includes patient-level clonotype analysis
- `--input_format adaptive` → includes bulk phenotype report
- `--workflow_level sample,patient,compare` (Patient workflow enabled) → includes patient-level clonotype analysis
+- `--input_format adaptive` or `airr` → includes bulk phenotype report
+- `--workflow_level` includes `patient` → includes patient-level clonotype analysis
+- `--use_gliph2` → additionally includes GLIPH2 clustering report
+
+## Key Parameters
+
+| Parameter | Default | Description |
+|---|---|---|
+| `--samplesheet` | — | Path or URL to sample sheet CSV |
+| `--outdir` | `out` | Output directory |
+| `--input_format` | `airr` | Input format: `airr`, `adaptive`, or `cellranger` |
+| `--workflow_level` | `sample,compare` | Analysis level(s): `sample`, `patient`, `compare` |
+| `--use_gliph2` | `false` | Enable GLIPH2 CDR3 motif clustering |
+| `--sobject_gex` | — | Path to TSV file containing cell-barcode phenotypes for pseudo-bulk phenotyping |
+| `--max_memory` | `768.GB` | Maximum memory allocation |
+| `--max_cpus` | `192` | Maximum CPU allocation |
+
diff --git a/env.yml b/env.yml
@@ -9,7 +9,7 @@ dependencies:
   - numpy=1.25.2
   - scipy=1.11.3
   - seaborn=0.13.0
-  - dash=2.14.1
+  - dash>=2.15.0
   - matplotlib=3.8.1
   - pip=23.2.1
   - jupyterlab=4.0.8
@@ -26,6 +26,7 @@ dependencies:
   - rpy2=3.6.4
   - unzip
   - openjdk=8
+  - upsetplot=0.9.0
 
   # R and R packages
   - r-base=4.4.2

diff --git a/nextflow.config b/nextflow.config
@@ -34,11 +34,11 @@ params {
     sample_stats_template = "${projectDir}/notebooks/sample_stats_template.qmd"
     compare_stats_template = "${projectDir}/notebooks/compare_stats_template.qmd"
 
-    // Sample stats metadata parameters
-    samplechart_x_col = 'timepoint'
-    samplechart_color_col = 'origin'
-    vgene_subject_col = 'subject_id'
-    vgene_x_cols = 'origin,timepoint'
+    // Notebooks parameters
+    timepoint_col = 'timepoint'
+    timepoint_order_col = 'timepoint_order'
+    alias_col = 'alias'
+    subject_col = 'subject_id'
 
-
+
+    // Backward-compatible aliases for legacy sample report params
+    samplechart_x_col = timepoint_col
+    samplechart_color_col = alias_col
+    vgene_subject_col = subject_col
+    vgene_x_cols = [timepoint_col, alias_col]
-
+
+    // Backward-compatible aliases for legacy sample report params
+    samplechart_x_col = timepoint_col
+    samplechart_color_col = alias_col
+    vgene_subject_col = subject_col
+    vgene_x_cols = [timepoint_col, alias_col]
     // OLGA parameters
     olga_chunk_length = 100000 // larger chunk size = less parallelization

diff --git a/notebooks/compare_stats_template.qmd b/notebooks/compare_stats_template.qmd
diff --git a/notebooks/gliph2_report_template.qmd b/notebooks/gliph2_report_template.qmd