Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions scripts/solo_learn/barlow/repo_setting/eval_solo_learn.slrm
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash

#SBATCH --job-name="eval_moco"
#SBATCH --partition=a40
#SBATCH --qos=a40_arashaf_genssl
#SBATCH --nodes=1
#SBATCH --gres=gpu:a40:4
#SBATCH --ntasks-per-node=4
#SBATCH --cpus-per-task=8
#SBATCH --mem=0
#SBATCH --output=singlenode-eval-%j.out
#SBATCH --error=singlenode-eval-%j.err
#SBATCH --open-mode=append
#SBATCH --wait-all-nodes=1
#SBATCH --time=24:00:00

# load virtual environment
source /ssd003/projects/aieng/envs/genssl3/bin/activate

export NCCL_IB_DISABLE=1 # Our cluster does not have InfiniBand. We need to disable usage using this flag.
export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
# export CUDA_LAUNCH_BLOCKING=1

export PYTHONPATH="."
nvidia-smi


torchrun --nproc-per-node=4 --nnodes=1 solo-learn/main_linear.py \
--config-path scripts/linear/imagenet/ \
--config-name mocov2plus.yaml
30 changes: 30 additions & 0 deletions scripts/solo_learn/barlow/repo_setting/eval_solo_learn_synth.slrm
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash

#SBATCH --job-name="eval_synth_moco"
#SBATCH --partition=a40
#SBATCH --qos=a40_arashaf_genssl
#SBATCH --nodes=1
#SBATCH --gres=gpu:a40:4
#SBATCH --ntasks-per-node=4
#SBATCH --cpus-per-task=8
#SBATCH --mem=0
#SBATCH --output=singlenode-eval-%j.out
#SBATCH --error=singlenode-eval-%j.err
#SBATCH --open-mode=append
#SBATCH --wait-all-nodes=1
#SBATCH --time=24:00:00

# load virtual environment
source /ssd003/projects/aieng/envs/genssl3/bin/activate

export NCCL_IB_DISABLE=1 # Our cluster does not have InfiniBand. We need to disable usage using this flag.
export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
# export CUDA_LAUNCH_BLOCKING=1

export PYTHONPATH="."
nvidia-smi


torchrun --nproc-per-node=4 --nnodes=1 solo-learn/main_linear.py \
--config-path scripts/linear/imagenet/paper_setting \
--config-name barwol_synth.yaml
29 changes: 29 additions & 0 deletions scripts/solo_learn/barlow/repo_setting/train_solo_learn.slrm
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

#SBATCH --job-name="barlow_train"
#SBATCH --partition=a40
#SBATCH --qos=a40_arashaf_genssl
#SBATCH --nodes=1
#SBATCH --gres=gpu:a40:4
#SBATCH --ntasks-per-node=4
#SBATCH --cpus-per-task=8
#SBATCH --mem=0
#SBATCH --output=singlenode-%j.out
#SBATCH --error=singlenode-%j.err
#SBATCH --open-mode=append
#SBATCH --wait-all-nodes=1
#SBATCH --time=96:00:00

# load virtual environment
source /ssd003/projects/aieng/envs/genssl3/bin/activate

export NCCL_IB_DISABLE=1 # Our cluster does not have InfiniBand. We need to disable usage using this flag.
export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
# export CUDA_LAUNCH_BLOCKING=1

export PYTHONPATH="."
nvidia-smi

torchrun --nproc-per-node=4 --nnodes=1 solo-learn/main_pretrain.py \
--config-path scripts/pretrain/imagenet/ \
--config-name barlow.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/bash

#SBATCH --job-name="barlow_synth_train"
#SBATCH --partition=a40
#SBATCH --qos=a40_arashaf_genssl
#SBATCH --nodes=1
#SBATCH --gres=gpu:a40:4
#SBATCH --ntasks-per-node=4
#SBATCH --cpus-per-task=8
#SBATCH --mem=0
#SBATCH --output=singlenode-%j.out
#SBATCH --error=singlenode-%j.err
#SBATCH --open-mode=append
#SBATCH --wait-all-nodes=1
#SBATCH --time=96:00:00

# load virtual environment
source /ssd003/projects/aieng/envs/genssl3/bin/activate

export NCCL_IB_DISABLE=1 # Our cluster does not have InfiniBand. We need to disable usage using this flag.
export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
# export CUDA_LAUNCH_BLOCKING=1

export PYTHONPATH="."
nvidia-smi

torchrun --nproc-per-node=4 --nnodes=1 solo-learn/main_pretrain.py \
--config-path scripts/pretrain/imagenet/ \
--config-name barlow_all_synthetic_icgan.yaml

wait

cd ~/projects/GenerativeSSL
sbatch scripts/solo_learn/barlow/repo_setting/train_synthetic_solo_learn.slrm
29 changes: 29 additions & 0 deletions scripts/solo_learn_dt/cifar10/barlow.slrm
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

#SBATCH --job-name="cifar_single"
#SBATCH --qos=m2
#SBATCH --nodes=1
#SBATCH --gres=gpu:4
#SBATCH --ntasks-per-node=2
#SBATCH --cpus-per-task=8
#SBATCH --mem=0
#SBATCH --output=singlenode-eval-%j.out
#SBATCH --error=singlenode-eval-%j.err
#SBATCH --open-mode=append
#SBATCH --wait-all-nodes=1
#SBATCH --time=4:00:00

# load virtual environment
source /ssd003/projects/aieng/envs/genssl3/bin/activate

export NCCL_IB_DISABLE=1 # Our cluster does not have InfiniBand. We need to disable usage using this flag.
export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
# export CUDA_LAUNCH_BLOCKING=1

export PYTHONPATH="."
nvidia-smi


torchrun --nproc-per-node=4 --nnodes=1 solo-learn/main_linear.py \
--config-path scripts/linear/cifar10/ \
--config-name barlow.yaml
29 changes: 29 additions & 0 deletions scripts/solo_learn_dt/cifar10/barlow_diff.slrm
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

#SBATCH --job-name="cifar_single"
#SBATCH --qos=m2
#SBATCH --nodes=1
#SBATCH --gres=gpu:4
#SBATCH --ntasks-per-node=2
#SBATCH --cpus-per-task=8
#SBATCH --mem=0
#SBATCH --output=singlenode-eval-%j.out
#SBATCH --error=singlenode-eval-%j.err
#SBATCH --open-mode=append
#SBATCH --wait-all-nodes=1
#SBATCH --time=4:00:00

# load virtual environment
source /ssd003/projects/aieng/envs/genssl3/bin/activate

export NCCL_IB_DISABLE=1 # Our cluster does not have InfiniBand. We need to disable usage using this flag.
export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
# export CUDA_LAUNCH_BLOCKING=1

export PYTHONPATH="."
nvidia-smi


torchrun --nproc-per-node=4 --nnodes=1 solo-learn/main_linear.py \
--config-path scripts/linear/cifar10/ \
--config-name barlow_diff.yaml
29 changes: 29 additions & 0 deletions scripts/solo_learn_dt/cifar10/barlow_icgan.slrm
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

#SBATCH --job-name="cifar_single"
#SBATCH --qos=m2
#SBATCH --nodes=1
#SBATCH --gres=gpu:4
#SBATCH --ntasks-per-node=2
#SBATCH --cpus-per-task=8
#SBATCH --mem=0
#SBATCH --output=singlenode-eval-%j.out
#SBATCH --error=singlenode-eval-%j.err
#SBATCH --open-mode=append
#SBATCH --wait-all-nodes=1
#SBATCH --time=4:00:00

# load virtual environment
source /ssd003/projects/aieng/envs/genssl3/bin/activate

export NCCL_IB_DISABLE=1 # Our cluster does not have InfiniBand. We need to disable usage using this flag.
export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
# export CUDA_LAUNCH_BLOCKING=1

export PYTHONPATH="."
nvidia-smi


torchrun --nproc-per-node=4 --nnodes=1 solo-learn/main_linear.py \
--config-path scripts/linear/cifar10/ \
--config-name barlow_icgan.yaml
29 changes: 29 additions & 0 deletions scripts/solo_learn_dt/cifar10/byol.slrm
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

#SBATCH --job-name="cifar_single"
#SBATCH --qos=m2
#SBATCH --nodes=1
#SBATCH --gres=gpu:4
#SBATCH --ntasks-per-node=2
#SBATCH --cpus-per-task=8
#SBATCH --mem=0
#SBATCH --output=singlenode-eval-%j.out
#SBATCH --error=singlenode-eval-%j.err
#SBATCH --open-mode=append
#SBATCH --wait-all-nodes=1
#SBATCH --time=4:00:00

# load virtual environment
source /ssd003/projects/aieng/envs/genssl3/bin/activate

export NCCL_IB_DISABLE=1 # Our cluster does not have InfiniBand. We need to disable usage using this flag.
export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
# export CUDA_LAUNCH_BLOCKING=1

export PYTHONPATH="."
nvidia-smi


torchrun --nproc-per-node=4 --nnodes=1 solo-learn/main_linear.py \
--config-path scripts/linear/cifar10/ \
--config-name byol.yaml
29 changes: 29 additions & 0 deletions scripts/solo_learn_dt/cifar10/byol_diff.slrm
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

#SBATCH --job-name="cifar_single"
#SBATCH --qos=m2
#SBATCH --nodes=1
#SBATCH --gres=gpu:4
#SBATCH --ntasks-per-node=2
#SBATCH --cpus-per-task=8
#SBATCH --mem=0
#SBATCH --output=singlenode-eval-%j.out
#SBATCH --error=singlenode-eval-%j.err
#SBATCH --open-mode=append
#SBATCH --wait-all-nodes=1
#SBATCH --time=4:00:00

# load virtual environment
source /ssd003/projects/aieng/envs/genssl3/bin/activate

export NCCL_IB_DISABLE=1 # Our cluster does not have InfiniBand. We need to disable usage using this flag.
export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
# export CUDA_LAUNCH_BLOCKING=1

export PYTHONPATH="."
nvidia-smi


torchrun --nproc-per-node=4 --nnodes=1 solo-learn/main_linear.py \
--config-path scripts/linear/cifar10/ \
--config-name byol_diff.yaml
29 changes: 29 additions & 0 deletions scripts/solo_learn_dt/cifar10/byol_icgan.slrm
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

#SBATCH --job-name="cifar_single"
#SBATCH --qos=m2
#SBATCH --nodes=1
#SBATCH --gres=gpu:4
#SBATCH --ntasks-per-node=2
#SBATCH --cpus-per-task=8
#SBATCH --mem=0
#SBATCH --output=singlenode-eval-%j.out
#SBATCH --error=singlenode-eval-%j.err
#SBATCH --open-mode=append
#SBATCH --wait-all-nodes=1
#SBATCH --time=4:00:00

# load virtual environment
source /ssd003/projects/aieng/envs/genssl3/bin/activate

export NCCL_IB_DISABLE=1 # Our cluster does not have InfiniBand. We need to disable usage using this flag.
export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
# export CUDA_LAUNCH_BLOCKING=1

export PYTHONPATH="."
nvidia-smi


torchrun --nproc-per-node=4 --nnodes=1 solo-learn/main_linear.py \
--config-path scripts/linear/cifar10/ \
--config-name byol_icgan.yaml
29 changes: 29 additions & 0 deletions scripts/solo_learn_dt/cifar10/moco.slrm
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

#SBATCH --job-name="cifar_single"
#SBATCH --qos=m2
#SBATCH --nodes=1
#SBATCH --gres=gpu:4
#SBATCH --ntasks-per-node=2
#SBATCH --cpus-per-task=8
#SBATCH --mem=0
#SBATCH --output=singlenode-eval-%j.out
#SBATCH --error=singlenode-eval-%j.err
#SBATCH --open-mode=append
#SBATCH --wait-all-nodes=1
#SBATCH --time=4:00:00

# load virtual environment
source /ssd003/projects/aieng/envs/genssl3/bin/activate

export NCCL_IB_DISABLE=1 # Our cluster does not have InfiniBand. We need to disable usage using this flag.
export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
# export CUDA_LAUNCH_BLOCKING=1

export PYTHONPATH="."
nvidia-smi


torchrun --nproc-per-node=4 --nnodes=1 solo-learn/main_linear.py \
--config-path scripts/linear/cifar10/ \
--config-name moco.yaml
29 changes: 29 additions & 0 deletions scripts/solo_learn_dt/cifar10/moco_diff.slrm
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

#SBATCH --job-name="cifar_single"
#SBATCH --qos=m2
#SBATCH --nodes=1
#SBATCH --gres=gpu:4
#SBATCH --ntasks-per-node=2
#SBATCH --cpus-per-task=8
#SBATCH --mem=0
#SBATCH --output=singlenode-eval-%j.out
#SBATCH --error=singlenode-eval-%j.err
#SBATCH --open-mode=append
#SBATCH --wait-all-nodes=1
#SBATCH --time=4:00:00

# load virtual environment
source /ssd003/projects/aieng/envs/genssl3/bin/activate

export NCCL_IB_DISABLE=1 # Our cluster does not have InfiniBand. We need to disable usage using this flag.
export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
# export CUDA_LAUNCH_BLOCKING=1

export PYTHONPATH="."
nvidia-smi


torchrun --nproc-per-node=4 --nnodes=1 solo-learn/main_linear.py \
--config-path scripts/linear/cifar10/ \
--config-name moco_diff.yaml
Loading