Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 23 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,32 @@ tmp
# specific files
output*
sample_dataset_30.txt
uv.lock
sample_dataset_highres_*.txt
uv.lock
watch_run.sh
wandb/
HEST/
eva-probe/
eval/
eval_results/
eval_results_summary.csv
CLAUDE.md
AGENTS.md
AGENTS.md

# Cluster launch scripts (cluster-specific paths + embedded credentials)
run_*.sbatch

# Random local artifacts
.claude/
*.pdf
image*.png
image\ copy*.png
wsi_*.png
*.docx
!docs/*.docx
# Local-only docs (technical report, generated artifacts)
docs/

# Local-only handoff and tracker docs (kept on disk, out of git)
HIGHRES_FINETUNING_GUIDE.md
HIGHRES_EXPERIMENTS.md
60 changes: 60 additions & 0 deletions dinov2/configs/train/vitg14_reg4_highres.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
dino:
head_n_prototypes: 131072
head_bottleneck_dim: 384
do_kde: True
kde_loss_weight: .05
koleo_loss_weight: 0
do_koleo: False
ibot:
loss_weight: 1.0
mask_sample_probability: 0.5
mask_ratio_min_max:
- 0.1
- 0.45
separate_head: true
head_n_prototypes: 131072
train:
sample_list_path: sample_dataset_highres_25M.txt # magnification-aware: [1, 0.5, 0.25, 0.125] µm/px at 448px
streaming_from_hf: false
streaming_dataset_path: medarc/TCGA-12K-parquet
batch_size_per_gpu: 6
centering: sinkhorn_knopp
use_pretrained: False
teacher_checkpoint_path: /data/ratna/retrain/eval/averaged_87500_to_137500/teacher_checkpoint.pth
OFFICIAL_EPOCH_LENGTH: 1250
num_workers: 24
prefetch_factor: 8
skip_checkpointer: false
gradient_accumulation_steps: 4
patch_size_pixels: 448
student:
arch: vit_giant2
patch_size: 14
drop_path_rate: 0.4
ffn_layer: swiglufused
block_chunks: 4
num_register_tokens: 4
teacher:
momentum_teacher: 0.994
optim:
epochs: 96 # 120k iterations / 1250 steps_per_epoch = 96 epochs
early_stop: 96
weight_decay_end: 0.2
base_lr: 1.0e-04
warmup_epochs: 2
layerwise_decay: 1.0
crops:
global_crops_scale:
- 0.32
- 1.0
local_crops_number: 8
local_crops_scale:
- 0.05
- 0.32
global_crops_size: 392
local_crops_size: 168
evaluation:
eval_period_iterations: 5000
bach_root: /block/eva-data/bach
breakhis_root: /block/eva-data/breakhis
pcam_root: /block/eva-data/patch_camelyon
68 changes: 68 additions & 0 deletions dinov2/configs/train/vitg14_reg4_highres_warmstart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
dino:
head_n_prototypes: 131072
head_bottleneck_dim: 384
do_kde: True
kde_loss_weight: .05
koleo_loss_weight: 0
do_koleo: False
ibot:
loss_weight: 1.0
mask_sample_probability: 0.5
mask_ratio_min_max:
- 0.1
- 0.45
separate_head: true
head_n_prototypes: 131072
train:
sample_list_path: /data/rdatchane/sample_dataset_highres_25M.txt
streaming_from_hf: false
streaming_dataset_path: medarc/TCGA-12K-parquet
batch_size_per_gpu: 12
centering: sinkhorn_knopp
use_pretrained: False
teacher_checkpoint_path: /data/OpenMidnight_ckpts/openmidnight_checkpoint.pth
OFFICIAL_EPOCH_LENGTH: 1250
num_workers: 24
prefetch_factor: 8
skip_checkpointer: false
gradient_accumulation_steps: 12 # 8 GPUs x 12 batch x 12 accum = 1152 (paper)
patch_size_pixels: 448
student:
arch: vit_giant2
patch_size: 14
drop_path_rate: 0.4
ffn_layer: swiglufused
block_chunks: 4
num_register_tokens: 4
teacher:
# Warm-start the teacher schedules at their Phase 1 final values to avoid
# the loss spike + classification regression caused by re-warming from base.
momentum_teacher: 0.9995 # was 0.994; Phase 1 ended near 1.0
final_momentum_teacher: 1.0
warmup_teacher_temp: 0.07 # was inheriting 0.04 from default; no warmup
teacher_temp: 0.07
warmup_teacher_temp_epochs: 0 # was inheriting 30
optim:
epochs: 48 # 48 x 1250 = 60k optimizer steps
early_stop: 48
weight_decay: 0.2 # was inheriting 0.04 from default; start at terminal
weight_decay_end: 0.2
base_lr: 1.0e-04 # paper value; sqrt-scaled to ~1.06e-4 at eff_batch=1152
warmup_epochs: 5 # LR warmup still useful for fresh AdamW
freeze_last_layer_epochs: 0 # was inheriting 1; student head already trained
layerwise_decay: 0.9 # DINOv2 finetuning standard; was 1.0 (off)
crops:
global_crops_scale:
- 0.32
- 1.0
local_crops_number: 8
local_crops_scale:
- 0.05
- 0.32
global_crops_size: 392
local_crops_size: 168
evaluation:
eval_period_iterations: 5000
bach_root: /block/eva-data/bach
breakhis_root: /block/eva-data/breakhis
pcam_root: /block/eva-data/patch_camelyon
68 changes: 68 additions & 0 deletions dinov2/configs/train/vitg14_reg4_highres_warmstart_accum4.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
dino:
head_n_prototypes: 131072
head_bottleneck_dim: 384
do_kde: True
kde_loss_weight: .05
koleo_loss_weight: 0
do_koleo: False
ibot:
loss_weight: 1.0
mask_sample_probability: 0.5
mask_ratio_min_max:
- 0.1
- 0.45
separate_head: true
head_n_prototypes: 131072
train:
sample_list_path: /data/rdatchane/sample_dataset_highres_25M.txt
streaming_from_hf: false
streaming_dataset_path: medarc/TCGA-12K-parquet
batch_size_per_gpu: 12
centering: sinkhorn_knopp
use_pretrained: False
teacher_checkpoint_path: /data/OpenMidnight_ckpts/openmidnight_checkpoint.pth
OFFICIAL_EPOCH_LENGTH: 1250
num_workers: 24
prefetch_factor: 8
skip_checkpointer: false
gradient_accumulation_steps: 4 # 8 GPUs x 12 batch x 4 accum = 384 (single-node fast)
patch_size_pixels: 448
student:
arch: vit_giant2
patch_size: 14
drop_path_rate: 0.4
ffn_layer: swiglufused
block_chunks: 4
num_register_tokens: 4
teacher:
# Warm-start the teacher schedules at their Phase 1 final values to avoid
# the loss spike + classification regression caused by re-warming from base.
momentum_teacher: 0.9995 # was 0.994; Phase 1 ended near 1.0
final_momentum_teacher: 1.0
warmup_teacher_temp: 0.07 # was inheriting 0.04 from default; no warmup
teacher_temp: 0.07
warmup_teacher_temp_epochs: 0 # was inheriting 30
optim:
epochs: 24 # 24 x 1250 = 30k optimizer steps
early_stop: 24
weight_decay: 0.2 # was inheriting 0.04 from default; start at terminal
weight_decay_end: 0.2
base_lr: 1.0e-04 # paper value; sqrt-scaled to ~1.06e-4 at eff_batch=1152
warmup_epochs: 5 # LR warmup still useful for fresh AdamW
freeze_last_layer_epochs: 0 # was inheriting 1; student head already trained
layerwise_decay: 0.9 # DINOv2 finetuning standard; was 1.0 (off)
crops:
global_crops_scale:
- 0.32
- 1.0
local_crops_number: 8
local_crops_scale:
- 0.05
- 0.32
global_crops_size: 392
local_crops_size: 168
evaluation:
eval_period_iterations: 5000
bach_root: /block/eva-data/bach
breakhis_root: /block/eva-data/breakhis
pcam_root: /block/eva-data/patch_camelyon
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
dino:
head_n_prototypes: 131072
head_bottleneck_dim: 384
do_kde: True
kde_loss_weight: .05
koleo_loss_weight: 0
do_koleo: False
ibot:
loss_weight: 1.0
mask_sample_probability: 0.5
mask_ratio_min_max:
- 0.1
- 0.45
separate_head: true
head_n_prototypes: 131072
train:
sample_list_path: /data/rdatchane/sample_dataset_highres_25M.txt
streaming_from_hf: false
streaming_dataset_path: medarc/TCGA-12K-parquet
batch_size_per_gpu: 12
centering: sinkhorn_knopp
use_pretrained: False
teacher_checkpoint_path: /data/OpenMidnight_ckpts/openmidnight_checkpoint.pth
OFFICIAL_EPOCH_LENGTH: 1250
num_workers: 24
prefetch_factor: 8
skip_checkpointer: false
gradient_accumulation_steps: 4 # 8 GPUs x 12 batch x 4 accum = 384 (single-node fast)
patch_size_pixels: 448
student:
arch: vit_giant2
patch_size: 14
drop_path_rate: 0.2 # finetuning standard; was 0.4 (Phase 1 pretraining default)
ffn_layer: swiglufused
block_chunks: 4
num_register_tokens: 4
teacher:
# Warm-start the teacher schedules at their Phase 1 final values to avoid
# the loss spike + classification regression caused by re-warming from base.
momentum_teacher: 0.9995 # was 0.994; Phase 1 ended near 1.0
final_momentum_teacher: 1.0
warmup_teacher_temp: 0.07 # was inheriting 0.04 from default; no warmup
teacher_temp: 0.07
warmup_teacher_temp_epochs: 0 # was inheriting 30
optim:
epochs: 24 # 24 x 1250 = 30k optimizer steps
early_stop: 24
weight_decay: 0.2 # was inheriting 0.04 from default; start at terminal
weight_decay_end: 0.2
base_lr: 1.0e-04 # paper value; sqrt-scaled to ~1.06e-4 at eff_batch=1152
warmup_epochs: 5 # LR warmup still useful for fresh AdamW
freeze_last_layer_epochs: 0 # was inheriting 1; student head already trained
layerwise_decay: 0.9 # DINOv2 finetuning standard; was 1.0 (off)
crops:
global_crops_scale:
- 0.32
- 1.0
local_crops_number: 8
local_crops_scale:
- 0.05
- 0.32
global_crops_size: 392
local_crops_size: 168
evaluation:
eval_period_iterations: 5000
bach_root: /block/eva-data/bach
breakhis_root: /block/eva-data/breakhis
pcam_root: /block/eva-data/patch_camelyon
68 changes: 68 additions & 0 deletions dinov2/configs/train/vitg14_reg4_highres_warmstart_short.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
dino:
head_n_prototypes: 131072
head_bottleneck_dim: 384
do_kde: True
kde_loss_weight: .05
koleo_loss_weight: 0
do_koleo: False
ibot:
loss_weight: 1.0
mask_sample_probability: 0.5
mask_ratio_min_max:
- 0.1
- 0.45
separate_head: true
head_n_prototypes: 131072
train:
sample_list_path: /data/rdatchane/sample_dataset_highres_25M.txt
streaming_from_hf: false
streaming_dataset_path: medarc/TCGA-12K-parquet
batch_size_per_gpu: 12
centering: sinkhorn_knopp
use_pretrained: False
teacher_checkpoint_path: /data/OpenMidnight_ckpts/openmidnight_checkpoint.pth
OFFICIAL_EPOCH_LENGTH: 1250
num_workers: 24
prefetch_factor: 8
skip_checkpointer: false
gradient_accumulation_steps: 12 # 8 GPUs x 12 batch x 12 accum = 1152 (paper)
patch_size_pixels: 448
student:
arch: vit_giant2
patch_size: 14
drop_path_rate: 0.4
ffn_layer: swiglufused
block_chunks: 4
num_register_tokens: 4
teacher:
# Warm-start the teacher schedules at their Phase 1 final values to avoid
# the loss spike + classification regression caused by re-warming from base.
momentum_teacher: 0.9995 # was 0.994; Phase 1 ended near 1.0
final_momentum_teacher: 1.0
warmup_teacher_temp: 0.07 # was inheriting 0.04 from default; no warmup
teacher_temp: 0.07
warmup_teacher_temp_epochs: 0 # was inheriting 30
optim:
epochs: 12 # 12 x 1250 = 15k optimizer steps (paper-batch short run)
early_stop: 12
weight_decay: 0.2 # was inheriting 0.04 from default; start at terminal
weight_decay_end: 0.2
base_lr: 1.0e-04 # paper value; sqrt-scaled to ~1.06e-4 at eff_batch=1152
warmup_epochs: 1 # 1 epoch = 1250 steps warmup (~8% of 15k) — schedule sized to fit
freeze_last_layer_epochs: 0 # was inheriting 1; student head already trained
layerwise_decay: 0.9 # DINOv2 finetuning standard; was 1.0 (off)
crops:
global_crops_scale:
- 0.32
- 1.0
local_crops_number: 8
local_crops_scale:
- 0.05
- 0.32
global_crops_size: 392
local_crops_size: 168
evaluation:
eval_period_iterations: 5000
bach_root: /block/eva-data/bach
breakhis_root: /block/eva-data/breakhis
pcam_root: /block/eva-data/patch_camelyon
Loading