diff --git a/scripts/eval_scripts/CIFAR10/baseline.slrm b/scripts/eval_scripts/CIFAR10/baseline.slrm
deleted file mode 100644
index 3ecb7a4..0000000
--- a/scripts/eval_scripts/CIFAR10/baseline.slrm
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-
-#SBATCH --job-name="cifar"
-#SBATCH --partition=a40
-#SBATCH --qos=deadline
-#SBATCH --account=deadline
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:4
-#SBATCH --ntasks-per-node=1
-#SBATCH --cpus-per-task=32
-#SBATCH --mem=0
-#SBATCH --output=slurm-cifar10_baseline_160_%j.out
-#SBATCH --open-mode=append
-#SBATCH --wait-all-nodes=1
-#SBATCH --time=72:00:00
-
-# activate virtual environment
-source /ssd003/projects/aieng/envs/genssl2/bin/activate
-
-export NCCL_IB_DISABLE=1  # Our cluster does not have InfiniBand. We need to disable usage using this flag.
-export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
-# export CUDA_LAUNCH_BLOCKING=1
-export MASTER_ADDR="$(hostname --fqdn)"
-export MASTER_PORT="$(python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1])')"
-export RDVZ_ID=$RANDOM
-echo "RDZV Endpoint $MASTER_ADDR:$MASTER_PORT"
-
-echo $MASTER_ADDR
-echo $MASTER_PORT
-
-export PYTHONPATH="."
-nvidia-smi
-
-python simsiam/linear_eval.py \
-    --data="/projects/imagenet_synthetic/fereshteh_datasets" \
-    --arch="resnet50" \
-    --multiprocessing-distributed \
-    --lars \
-    --batch-size=4096 \
-    --epochs=100 \
-    -j=16 \
-    --world-size 1 \
-    --rank 0 \
-    --pretrained="/projects/imagenet_synthetic/model_checkpoints/simsiam_baseline_seed43_bs128_rforig_2024-03-05-12-27/checkpoint_0160.pth.tar"\
-    --dist-url "tcp://$MASTER_ADDR:$MASTER_PORT" \
-    --dataset_name="cifar10" \
-    --num_classes=10
diff --git a/scripts/eval_scripts/CIFAR10/icgan.slrm b/scripts/eval_scripts/CIFAR10/icgan.slrm
deleted file mode 100644
index f4bf503..0000000
--- a/scripts/eval_scripts/CIFAR10/icgan.slrm
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-
-#SBATCH --job-name="cifar"
-#SBATCH --partition=a40
-#SBATCH --qos=deadline
-#SBATCH --account=deadline
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:4
-#SBATCH --ntasks-per-node=1
-#SBATCH --cpus-per-task=32
-#SBATCH --mem=0
-#SBATCH --output=slurm-cifar10_baseline_160_%j.out
-#SBATCH --open-mode=append
-#SBATCH --wait-all-nodes=1
-#SBATCH --time=72:00:00
-
-# activate virtual environment
-source /ssd003/projects/aieng/envs/genssl2/bin/activate
-
-export NCCL_IB_DISABLE=1  # Our cluster does not have InfiniBand. We need to disable usage using this flag.
-export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
-# export CUDA_LAUNCH_BLOCKING=1
-export MASTER_ADDR="$(hostname --fqdn)"
-export MASTER_PORT="$(python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1])')"
-export RDVZ_ID=$RANDOM
-echo "RDZV Endpoint $MASTER_ADDR:$MASTER_PORT"
-
-echo $MASTER_ADDR
-echo $MASTER_PORT
-
-export PYTHONPATH="."
-nvidia-smi
-
-python simsiam/linear_eval.py \
-    --data="/projects/imagenet_synthetic/fereshteh_datasets" \
-    --arch="resnet50" \
-    --multiprocessing-distributed \
-    --lars \
-    --batch-size=4096 \
-    --epochs=100 \
-    -j=16 \
-    --world-size 1 \
-    --rank 0 \
-    --pretrained="/projects/imagenet_synthetic/model_checkpoints/simsiam_icgan_seed43_bs128_rforig_2024-03-05-12-52/checkpoint_0160.pth.tar"\
-    --dist-url "tcp://$MASTER_ADDR:$MASTER_PORT" \
-    --dataset_name="cifar10" \
-    --num_classes=10
diff --git a/scripts/eval_scripts/CIFAR10/stablediff.slrm b/scripts/eval_scripts/CIFAR10/stablediff.slrm
deleted file mode 100644
index 64361fb..0000000
--- a/scripts/eval_scripts/CIFAR10/stablediff.slrm
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-
-#SBATCH --job-name="cifar"
-#SBATCH --partition=a40
-#SBATCH --qos=deadline
-#SBATCH --account=deadline
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:4
-#SBATCH --ntasks-per-node=1
-#SBATCH --cpus-per-task=32
-#SBATCH --mem=0
-#SBATCH --output=slurm-cifar10_baseline_160_%j.out
-#SBATCH --open-mode=append
-#SBATCH --wait-all-nodes=1
-#SBATCH --time=72:00:00
-
-# activate virtual environment
-source /ssd003/projects/aieng/envs/genssl2/bin/activate
-
-export NCCL_IB_DISABLE=1  # Our cluster does not have InfiniBand. We need to disable usage using this flag.
-export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
-# export CUDA_LAUNCH_BLOCKING=1
-export MASTER_ADDR="$(hostname --fqdn)"
-export MASTER_PORT="$(python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1])')"
-export RDVZ_ID=$RANDOM
-echo "RDZV Endpoint $MASTER_ADDR:$MASTER_PORT"
-
-echo $MASTER_ADDR
-echo $MASTER_PORT
-
-export PYTHONPATH="."
-nvidia-smi
-
-python simsiam/linear_eval.py \
-    --data="/projects/imagenet_synthetic/fereshteh_datasets" \
-    --arch="resnet50" \
-    --multiprocessing-distributed \
-    --lars \
-    --batch-size=4096 \
-    --epochs=100 \
-    -j=16 \
-    --world-size 1 \
-    --rank 0 \
-    --pretrained="/projects/imagenet_synthetic/model_checkpoints/simsiam_stablediff_p0p5_seed43_2024-03-05-13-39/checkpoint_0160.pth.tar"\
-    --dist-url "tcp://$MASTER_ADDR:$MASTER_PORT" \
-    --dataset_name="cifar10" \
-    --num_classes=10
diff --git a/scripts/eval_scripts/CIFAR100/baseline.slrm b/scripts/eval_scripts/CIFAR100/baseline.slrm
deleted file mode 100644
index a68be76..0000000
--- a/scripts/eval_scripts/CIFAR100/baseline.slrm
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-
-#SBATCH --job-name="cifar"
-#SBATCH --partition=a40
-#SBATCH --qos=deadline
-#SBATCH --account=deadline	
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:4
-#SBATCH --ntasks-per-node=1
-#SBATCH --cpus-per-task=32
-#SBATCH --mem=0
-#SBATCH --output=cifar100_baseline_160_%j.out
-#SBATCH --error=cifar100_baseline_160_%j.err
-#SBATCH --open-mode=append
-#SBATCH --wait-all-nodes=1
-#SBATCH --time=72:00:00
-
-# activate virtual environment
-source /ssd003/projects/aieng/envs/genssl2/bin/activate
-
-export NCCL_IB_DISABLE=1  # Our cluster does not have InfiniBand. We need to disable usage using this flag.
-export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
-# export CUDA_LAUNCH_BLOCKING=1
-export MASTER_ADDR="$(hostname --fqdn)"
-export MASTER_PORT="$(python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1])')"
-export RDVZ_ID=$RANDOM
-echo "RDZV Endpoint $MASTER_ADDR:$MASTER_PORT"
-
-echo $MASTER_ADDR
-echo $MASTER_PORT
-
-export PYTHONPATH="."
-nvidia-smi
-
-python simsiam/linear_eval.py \
-    --data="/projects/imagenet_synthetic/fereshteh_datasets" \
-    --arch="resnet50" \
-    --multiprocessing-distributed \
-    --lars \
-    --batch-size=4096 \
-    --epochs=100 \
-    -j=16 \
-    --world-size 1 \
-    --rank 0 \
-    --pretrained="/projects/imagenet_synthetic/model_checkpoints/simsiam_baseline_seed43_bs128_rforig_2024-03-05-12-27/checkpoint_0160.pth.tar"\
-    --dist-url "tcp://$MASTER_ADDR:$MASTER_PORT" \
-    --dataset_name="cifar100" \
-    --num_classes=100
\ No newline at end of file
diff --git a/scripts/eval_scripts/CIFAR100/icgan.slrm b/scripts/eval_scripts/CIFAR100/icgan.slrm
deleted file mode 100644
index 98a2125..0000000
--- a/scripts/eval_scripts/CIFAR100/icgan.slrm
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-
-#SBATCH --job-name="cifar"
-#SBATCH --partition=a40
-#SBATCH --qos=deadline
-#SBATCH --account=deadline	
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:4
-#SBATCH --ntasks-per-node=1
-#SBATCH --cpus-per-task=32
-#SBATCH --mem=0
-#SBATCH --output=cifar100_baseline_160_%j.out
-#SBATCH --error=cifar100_baseline_160_%j.err
-#SBATCH --open-mode=append
-#SBATCH --wait-all-nodes=1
-#SBATCH --time=72:00:00
-
-# activate virtual environment
-source /ssd003/projects/aieng/envs/genssl2/bin/activate
-
-export NCCL_IB_DISABLE=1  # Our cluster does not have InfiniBand. We need to disable usage using this flag.
-export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
-# export CUDA_LAUNCH_BLOCKING=1
-export MASTER_ADDR="$(hostname --fqdn)"
-export MASTER_PORT="$(python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1])')"
-export RDVZ_ID=$RANDOM
-echo "RDZV Endpoint $MASTER_ADDR:$MASTER_PORT"
-
-echo $MASTER_ADDR
-echo $MASTER_PORT
-
-export PYTHONPATH="."
-nvidia-smi
-
-python simsiam/linear_eval.py \
-    --data="/projects/imagenet_synthetic/fereshteh_datasets" \
-    --arch="resnet50" \
-    --multiprocessing-distributed \
-    --lars \
-    --batch-size=4096 \
-    --epochs=100 \
-    -j=16 \
-    --world-size 1 \
-    --rank 0 \
-    --pretrained="/projects/imagenet_synthetic/model_checkpoints/simsiam_icgan_seed43_bs128_rforig_2024-03-05-12-52/checkpoint_0160.pth.tar"\
-    --dist-url "tcp://$MASTER_ADDR:$MASTER_PORT" \
-    --dataset_name="cifar100" \
-    --num_classes=100
\ No newline at end of file
diff --git a/scripts/eval_scripts/CIFAR100/stablediff.slrm b/scripts/eval_scripts/CIFAR100/stablediff.slrm
deleted file mode 100644
index 9f6d928..0000000
--- a/scripts/eval_scripts/CIFAR100/stablediff.slrm
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-
-#SBATCH --job-name="cifar"
-#SBATCH --partition=a40
-#SBATCH --qos=deadline
-#SBATCH --account=deadline	
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:4
-#SBATCH --ntasks-per-node=1
-#SBATCH --cpus-per-task=32
-#SBATCH --mem=0
-#SBATCH --output=cifar100_baseline_160_%j.out
-#SBATCH --error=cifar100_baseline_160_%j.err
-#SBATCH --open-mode=append
-#SBATCH --wait-all-nodes=1
-#SBATCH --time=72:00:00
-
-# activate virtual environment
-source /ssd003/projects/aieng/envs/genssl2/bin/activate
-
-export NCCL_IB_DISABLE=1  # Our cluster does not have InfiniBand. We need to disable usage using this flag.
-export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
-# export CUDA_LAUNCH_BLOCKING=1
-export MASTER_ADDR="$(hostname --fqdn)"
-export MASTER_PORT="$(python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1])')"
-export RDVZ_ID=$RANDOM
-echo "RDZV Endpoint $MASTER_ADDR:$MASTER_PORT"
-
-echo $MASTER_ADDR
-echo $MASTER_PORT
-
-export PYTHONPATH="."
-nvidia-smi
-
-python simsiam/linear_eval.py \
-    --data="/projects/imagenet_synthetic/fereshteh_datasets" \
-    --arch="resnet50" \
-    --multiprocessing-distributed \
-    --lars \
-    --batch-size=4096 \
-    --epochs=100 \
-    -j=16 \
-    --world-size 1 \
-    --rank 0 \
-    --pretrained="/projects/imagenet_synthetic/model_checkpoints/simsiam_stablediff_p0p5_seed43_2024-03-05-13-39/checkpoint_0160.pth.tar"\
-    --dist-url "tcp://$MASTER_ADDR:$MASTER_PORT" \
-    --dataset_name="cifar100" \
-    --num_classes=100
\ No newline at end of file
diff --git a/scripts/eval_scripts/INaturalist/baseline.slrm b/scripts/eval_scripts/INaturalist/baseline.slrm
deleted file mode 100644
index e68bef7..0000000
--- a/scripts/eval_scripts/INaturalist/baseline.slrm
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-
-#SBATCH --job-name="inaturalist"
-#SBATCH --partition=a40
-#SBATCH --qos=deadline
-#SBATCH --account=deadline	
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:4
-#SBATCH --ntasks-per-node=1
-#SBATCH --cpus-per-task=32
-#SBATCH --mem=0
-#SBATCH --output=inaturalist_baseline_%j.out
-#SBATCH --error=inaturalist_baseline_%j.err
-#SBATCH --open-mode=append
-#SBATCH --wait-all-nodes=1
-#SBATCH --time=72:00:00
-
-# activate virtual environment
-source /ssd003/projects/aieng/envs/genssl2/bin/activate
-
-export NCCL_IB_DISABLE=1  # Our cluster does not have InfiniBand. We need to disable usage using this flag.
-export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
-# export CUDA_LAUNCH_BLOCKING=1
-export MASTER_ADDR="$(hostname --fqdn)"
-export MASTER_PORT="$(python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1])')"
-export RDVZ_ID=$RANDOM
-echo "RDZV Endpoint $MASTER_ADDR:$MASTER_PORT"
-
-echo $MASTER_ADDR
-echo $MASTER_PORT
-
-export PYTHONPATH="."
-nvidia-smi
-
-python simsiam/linear_eval.py \
-    --data="/datasets/inat_comp/2018/" \
-    --arch="resnet50" \
-    --multiprocessing-distributed \
-    --lars \
-    --batch-size=4096 \
-    --epochs=100 \
-    -j=16 \
-    --world-size 1 \
-    --rank 0 \
-    --pretrained="/projects/imagenet_synthetic/model_checkpoints/simsiam_baseline_seed43_bs128_rforig_2024-03-05-12-27/checkpoint_0160.pth.tar"\
-    --dist-url "tcp://$MASTER_ADDR:$MASTER_PORT" \
-    --dataset_name="INaturalist" \
-    --num_classes=8142
\ No newline at end of file
diff --git a/scripts/eval_scripts/INaturalist/icgan.slrm b/scripts/eval_scripts/INaturalist/icgan.slrm
deleted file mode 100644
index 2341e6f..0000000
--- a/scripts/eval_scripts/INaturalist/icgan.slrm
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-
-#SBATCH --job-name="inaturalist"
-#SBATCH --partition=a40
-#SBATCH --qos=deadline
-#SBATCH --account=deadline	
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:4
-#SBATCH --ntasks-per-node=1
-#SBATCH --cpus-per-task=32
-#SBATCH --mem=0
-#SBATCH --output=inaturalist_baseline_%j.out
-#SBATCH --error=inaturalist_baseline_%j.err
-#SBATCH --open-mode=append
-#SBATCH --wait-all-nodes=1
-#SBATCH --time=72:00:00
-
-# activate virtual environment
-source /ssd003/projects/aieng/envs/genssl2/bin/activate
-
-export NCCL_IB_DISABLE=1  # Our cluster does not have InfiniBand. We need to disable usage using this flag.
-export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
-# export CUDA_LAUNCH_BLOCKING=1
-export MASTER_ADDR="$(hostname --fqdn)"
-export MASTER_PORT="$(python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1])')"
-export RDVZ_ID=$RANDOM
-echo "RDZV Endpoint $MASTER_ADDR:$MASTER_PORT"
-
-echo $MASTER_ADDR
-echo $MASTER_PORT
-
-export PYTHONPATH="."
-nvidia-smi
-
-python simsiam/linear_eval.py \
-    --data="/datasets/inat_comp/2018/" \
-    --arch="resnet50" \
-    --multiprocessing-distributed \
-    --lars \
-    --batch-size=4096 \
-    --epochs=100 \
-    -j=16 \
-    --world-size 1 \
-    --rank 0 \
-    --pretrained="/projects/imagenet_synthetic/model_checkpoints/simsiam_icgan_seed43_bs128_rforig_2024-03-05-12-52/checkpoint_0160.pth.tar"\
-    --dist-url "tcp://$MASTER_ADDR:$MASTER_PORT" \
-    --dataset_name="INaturalist" \
-    --num_classes=8142
\ No newline at end of file
diff --git a/scripts/eval_scripts/INaturalist/stablediff.slrm b/scripts/eval_scripts/INaturalist/stablediff.slrm
deleted file mode 100644
index 29f1159..0000000
--- a/scripts/eval_scripts/INaturalist/stablediff.slrm
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-
-#SBATCH --job-name="inaturalist"
-#SBATCH --partition=a40
-#SBATCH --qos=deadline
-#SBATCH --account=deadline	
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:4
-#SBATCH --ntasks-per-node=1
-#SBATCH --cpus-per-task=32
-#SBATCH --mem=0
-#SBATCH --output=inaturalist_baseline_%j.out
-#SBATCH --error=inaturalist_baseline_%j.err
-#SBATCH --open-mode=append
-#SBATCH --wait-all-nodes=1
-#SBATCH --time=72:00:00
-
-# activate virtual environment
-source /ssd003/projects/aieng/envs/genssl2/bin/activate
-
-export NCCL_IB_DISABLE=1  # Our cluster does not have InfiniBand. We need to disable usage using this flag.
-export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
-# export CUDA_LAUNCH_BLOCKING=1
-export MASTER_ADDR="$(hostname --fqdn)"
-export MASTER_PORT="$(python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1])')"
-export RDVZ_ID=$RANDOM
-echo "RDZV Endpoint $MASTER_ADDR:$MASTER_PORT"
-
-echo $MASTER_ADDR
-echo $MASTER_PORT
-
-export PYTHONPATH="."
-nvidia-smi
-
-python simsiam/linear_eval.py \
-    --data="/datasets/inat_comp/2018/" \
-    --arch="resnet50" \
-    --multiprocessing-distributed \
-    --lars \
-    --batch-size=4096 \
-    --epochs=100 \
-    -j=16 \
-    --world-size 1 \
-    --rank 0 \
-    --pretrained="/projects/imagenet_synthetic/model_checkpoints/simsiam_stablediff_p0p5_seed43_2024-03-05-13-39/checkpoint_0160.pth.tar"\
-    --dist-url "tcp://$MASTER_ADDR:$MASTER_PORT" \
-    --dataset_name="INaturalist" \
-    --num_classes=8142
\ No newline at end of file
diff --git a/scripts/eval_scripts/food101/baseline.slrm b/scripts/eval_scripts/food101/baseline.slrm
deleted file mode 100644
index f9f5fdf..0000000
--- a/scripts/eval_scripts/food101/baseline.slrm
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-
-#SBATCH --job-name="food101"
-#SBATCH --partition=a40
-#SBATCH --qos=deadline
-#SBATCH --account=deadline	
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:4
-#SBATCH --ntasks-per-node=1
-#SBATCH --cpus-per-task=32
-#SBATCH --mem=0
-#SBATCH --output=slurm-food101_baseline_160_%j.out
-#SBATCH --open-mode=append
-#SBATCH --wait-all-nodes=1
-#SBATCH --time=72:00:00
-
-# activate virtual environment
-source /ssd003/projects/aieng/envs/genssl2/bin/activate
-
-export NCCL_IB_DISABLE=1  # Our cluster does not have InfiniBand. We need to disable usage using this flag.
-export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
-# export CUDA_LAUNCH_BLOCKING=1
-export MASTER_ADDR="$(hostname --fqdn)"
-export MASTER_PORT="$(python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1])')"
-export RDVZ_ID=$RANDOM
-echo "RDZV Endpoint $MASTER_ADDR:$MASTER_PORT"
-
-echo $MASTER_ADDR
-echo $MASTER_PORT
-
-export PYTHONPATH="."
-nvidia-smi
-
-python simsiam/linear_eval.py \
-    --data="/projects/imagenet_synthetic/fereshteh_datasets" \
-    --arch="resnet50" \
-    --multiprocessing-distributed \
-    --lars \
-    --batch-size=4096 \
-    --epochs=100 \
-    -j=16 \
-    --world-size 1 \
-    --rank 0 \
-    --pretrained="/projects/imagenet_synthetic/model_checkpoints/simsiam_baseline_seed43_bs128_rforig_2024-03-05-12-27/checkpoint_0160.pth.tar"\
-    --dist-url "tcp://$MASTER_ADDR:$MASTER_PORT" \
-    --dataset_name="food101" \
-    --num_classes=101
\ No newline at end of file
diff --git a/scripts/eval_scripts/food101/icgan.slrm b/scripts/eval_scripts/food101/icgan.slrm
deleted file mode 100644
index c31f3a5..0000000
--- a/scripts/eval_scripts/food101/icgan.slrm
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-
-#SBATCH --job-name="food101"
-#SBATCH --partition=a40
-#SBATCH --qos=deadline
-#SBATCH --account=deadline	
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:4
-#SBATCH --ntasks-per-node=1
-#SBATCH --cpus-per-task=32
-#SBATCH --mem=0
-#SBATCH --output=slurm-food101_baseline_160_%j.out
-#SBATCH --open-mode=append
-#SBATCH --wait-all-nodes=1
-#SBATCH --time=72:00:00
-
-# activate virtual environment
-source /ssd003/projects/aieng/envs/genssl2/bin/activate
-
-export NCCL_IB_DISABLE=1  # Our cluster does not have InfiniBand. We need to disable usage using this flag.
-export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
-# export CUDA_LAUNCH_BLOCKING=1
-export MASTER_ADDR="$(hostname --fqdn)"
-export MASTER_PORT="$(python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1])')"
-export RDVZ_ID=$RANDOM
-echo "RDZV Endpoint $MASTER_ADDR:$MASTER_PORT"
-
-echo $MASTER_ADDR
-echo $MASTER_PORT
-
-export PYTHONPATH="."
-nvidia-smi
-
-python simsiam/linear_eval.py \
-    --data="/projects/imagenet_synthetic/fereshteh_datasets" \
-    --arch="resnet50" \
-    --multiprocessing-distributed \
-    --lars \
-    --batch-size=4096 \
-    --epochs=100 \
-    -j=16 \
-    --world-size 1 \
-    --rank 0 \
-    --pretrained="P/projects/imagenet_synthetic/model_checkpoints/simsiam_icgan_seed43_bs128_rforig_2024-03-05-12-52/checkpoint_0160.pth.tar"\
-    --dist-url "tcp://$MASTER_ADDR:$MASTER_PORT" \
-    --dataset_name="food101" \
-    --num_classes=101
\ No newline at end of file
diff --git a/scripts/eval_scripts/food101/stablediff.slrm b/scripts/eval_scripts/food101/stablediff.slrm
deleted file mode 100644
index a30522b..0000000
--- a/scripts/eval_scripts/food101/stablediff.slrm
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-
-#SBATCH --job-name="food101"
-#SBATCH --partition=a40
-#SBATCH --qos=deadline
-#SBATCH --account=deadline	
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:4
-#SBATCH --ntasks-per-node=1
-#SBATCH --cpus-per-task=32
-#SBATCH --mem=0
-#SBATCH --output=slurm-food101_baseline_160_%j.out
-#SBATCH --open-mode=append
-#SBATCH --wait-all-nodes=1
-#SBATCH --time=72:00:00
-
-# activate virtual environment
-source /ssd003/projects/aieng/envs/genssl2/bin/activate
-
-export NCCL_IB_DISABLE=1  # Our cluster does not have InfiniBand. We need to disable usage using this flag.
-export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
-# export CUDA_LAUNCH_BLOCKING=1
-export MASTER_ADDR="$(hostname --fqdn)"
-export MASTER_PORT="$(python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1])')"
-export RDVZ_ID=$RANDOM
-echo "RDZV Endpoint $MASTER_ADDR:$MASTER_PORT"
-
-echo $MASTER_ADDR
-echo $MASTER_PORT
-
-export PYTHONPATH="."
-nvidia-smi
-
-python simsiam/linear_eval.py \
-    --data="/projects/imagenet_synthetic/fereshteh_datasets" \
-    --arch="resnet50" \
-    --multiprocessing-distributed \
-    --lars \
-    --batch-size=4096 \
-    --epochs=100 \
-    -j=16 \
-    --world-size 1 \
-    --rank 0 \
-    --pretrained="/projects/imagenet_synthetic/model_checkpoints/simsiam_stablediff_p0p5_seed43_2024-03-05-13-39/checkpoint_0160.pth.tar"\
-    --dist-url "tcp://$MASTER_ADDR:$MASTER_PORT" \
-    --dataset_name="food101" \
-    --num_classes=101
\ No newline at end of file
diff --git a/scripts/eval_scripts/imagenet/baseline.slrm b/scripts/eval_scripts/imagenet/baseline.slrm
deleted file mode 100644
index 11417ec..0000000
--- a/scripts/eval_scripts/imagenet/baseline.slrm
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/bin/bash
-
-#SBATCH --job-name="imagenet_eval"
-#SBATCH --partition=a40
-#SBATCH --qos=deadline
-#SBATCH --account=deadline	
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:4
-#SBATCH --ntasks-per-node=1
-#SBATCH --cpus-per-task=32
-#SBATCH --mem=0
-#SBATCH --output=imagenet_baseline_%j.out
-#SBATCH --error=imagenet_baseline_%j.err
-#SBATCH --open-mode=append
-#SBATCH --wait-all-nodes=1
-#SBATCH --time=72:00:00
-
-# activate virtual environment
-source /ssd003/projects/aieng/envs/genssl2/bin/activate
-
-export NCCL_IB_DISABLE=1  # Our cluster does not have InfiniBand. We need to disable usage using this flag.
-export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
-# export CUDA_LAUNCH_BLOCKING=1
-export MASTER_ADDR="$(hostname --fqdn)"
-export MASTER_PORT="$(python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1])')"
-export RDVZ_ID=$RANDOM
-echo "RDZV Endpoint $MASTER_ADDR:$MASTER_PORT"
-
-echo $MASTER_ADDR
-echo $MASTER_PORT
-
-export PYTHONPATH="."
-nvidia-smi
-
-python simsiam/linear_eval.py \
-    --data="/scratch/ssd004/datasets/imagenet256" \
-    --arch="resnet50" \
-    --multiprocessing-distributed \
-    --lars --batch-size=2048 \
-    --epochs=100 \
-    -j=16 \
-    --world-size 1 \
-    --rank 0 \
-    --pretrained="/projects/imagenet_synthetic/model_checkpoints/simsiam_baseline_seed43_bs128_rforig_2024-03-05-12-27/checkpoint_0160.pth.tar" \
-    --dist-url "tcp://$MASTER_ADDR:$MASTER_PORT"
diff --git a/scripts/eval_scripts/imagenet/icgan.slrm b/scripts/eval_scripts/imagenet/icgan.slrm
deleted file mode 100644
index e68050d..0000000
--- a/scripts/eval_scripts/imagenet/icgan.slrm
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/bin/bash
-
-#SBATCH --job-name="imagenet_eval"
-#SBATCH --partition=a40
-#SBATCH --qos=deadline
-#SBATCH --account=deadline	
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:4
-#SBATCH --ntasks-per-node=1
-#SBATCH --cpus-per-task=32
-#SBATCH --mem=0
-#SBATCH --output=imagenet_baseline_%j.out
-#SBATCH --error=imagenet_baseline_%j.err
-#SBATCH --open-mode=append
-#SBATCH --wait-all-nodes=1
-#SBATCH --time=72:00:00
-
-# activate virtual environment
-source /ssd003/projects/aieng/envs/genssl2/bin/activate
-
-export NCCL_IB_DISABLE=1  # Our cluster does not have InfiniBand. We need to disable usage using this flag.
-export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
-# export CUDA_LAUNCH_BLOCKING=1
-export MASTER_ADDR="$(hostname --fqdn)"
-export MASTER_PORT="$(python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1])')"
-export RDVZ_ID=$RANDOM
-echo "RDZV Endpoint $MASTER_ADDR:$MASTER_PORT"
-
-echo $MASTER_ADDR
-echo $MASTER_PORT
-
-export PYTHONPATH="."
-nvidia-smi
-
-python simsiam/linear_eval.py \
-    --data="/scratch/ssd004/datasets/imagenet256" \
-    --arch="resnet50" \
-    --multiprocessing-distributed \
-    --lars --batch-size=2048 \
-    --epochs=100 \
-    -j=16 \
-    --world-size 1 \
-    --rank 0 \
-    --pretrained="/projects/imagenet_synthetic/model_checkpoints/simsiam_icgan_seed43_bs128_rforig_2024-03-05-12-52/checkpoint_0160.pth.tar" \
-    --dist-url "tcp://$MASTER_ADDR:$MASTER_PORT"
diff --git a/scripts/eval_scripts/imagenet/stablediff.slrm b/scripts/eval_scripts/imagenet/stablediff.slrm
deleted file mode 100644
index 37c85c5..0000000
--- a/scripts/eval_scripts/imagenet/stablediff.slrm
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/bin/bash
-
-#SBATCH --job-name="imagenet_eval"
-#SBATCH --partition=a40
-#SBATCH --qos=deadline
-#SBATCH --account=deadline	
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:4
-#SBATCH --ntasks-per-node=1
-#SBATCH --cpus-per-task=32
-#SBATCH --mem=0
-#SBATCH --output=imagenet_baseline_%j.out
-#SBATCH --error=imagenet_baseline_%j.err
-#SBATCH --open-mode=append
-#SBATCH --wait-all-nodes=1
-#SBATCH --time=72:00:00
-
-# activate virtual environment
-source /ssd003/projects/aieng/envs/genssl2/bin/activate
-
-export NCCL_IB_DISABLE=1  # Our cluster does not have InfiniBand. We need to disable usage using this flag.
-export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
-# export CUDA_LAUNCH_BLOCKING=1
-export MASTER_ADDR="$(hostname --fqdn)"
-export MASTER_PORT="$(python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1])')"
-export RDVZ_ID=$RANDOM
-echo "RDZV Endpoint $MASTER_ADDR:$MASTER_PORT"
-
-echo $MASTER_ADDR
-echo $MASTER_PORT
-
-export PYTHONPATH="."
-nvidia-smi
-
-python simsiam/linear_eval.py \
-    --data="/scratch/ssd004/datasets/imagenet256" \
-    --arch="resnet50" \
-    --multiprocessing-distributed \
-    --lars --batch-size=2048 \
-    --epochs=100 \
-    -j=16 \
-    --world-size 1 \
-    --rank 0 \
-    --pretrained="/projects/imagenet_synthetic/model_checkpoints/simsiam_stablediff_p0p5_seed43_2024-03-05-13-39/checkpoint_0160.pth.tar" \
-    --dist-url "tcp://$MASTER_ADDR:$MASTER_PORT"
diff --git a/scripts/eval_scripts/places365/baseline.slrm b/scripts/eval_scripts/places365/baseline.slrm
deleted file mode 100644
index a619037..0000000
--- a/scripts/eval_scripts/places365/baseline.slrm
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-
-#SBATCH --job-name="places365"
-#SBATCH --partition=rtx6000
-#SBATCH --qos=deadline
-#SBATCH --account=deadline	
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:4
-#SBATCH --ntasks-per-node=1
-#SBATCH --cpus-per-task=32
-#SBATCH --mem=0
-#SBATCH --output=places365_baseline_160_%j.out
-#SBATCH --error=places365_baseline_160_%j.err
-#SBATCH --open-mode=append
-#SBATCH --wait-all-nodes=1
-#SBATCH --time=72:00:00
-
-# activate virtual environment
-source /ssd003/projects/aieng/envs/genssl2/bin/activate
-
-export NCCL_IB_DISABLE=1  # Our cluster does not have InfiniBand. We need to disable usage using this flag.
-export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
-# export CUDA_LAUNCH_BLOCKING=1
-export MASTER_ADDR="$(hostname --fqdn)"
-export MASTER_PORT="$(python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1])')"
-export RDVZ_ID=$RANDOM
-echo "RDZV Endpoint $MASTER_ADDR:$MASTER_PORT"
-
-echo $MASTER_ADDR
-echo $MASTER_PORT
-
-export PYTHONPATH="."
-nvidia-smi
-
-python simsiam/linear_eval.py \
-    --data="/projects/imagenet_synthetic/fereshteh_datasets/places365" \
-    --arch="resnet50" \
-    --multiprocessing-distributed \
-    --lars \
-    --batch-size=4096 \
-    --epochs=100 \
-    -j=16 \
-    --world-size 1 \
-    --rank 0 \
-    --pretrained="/projects/imagenet_synthetic/model_checkpoints/simsiam_baseline_seed43_bs128_rforig_2024-03-05-12-27/checkpoint_0160.pth.tar"\
-    --dist-url "tcp://$MASTER_ADDR:$MASTER_PORT" \
-    --dataset_name="places365" \
-    --num_classes=434
\ No newline at end of file
diff --git a/scripts/eval_scripts/places365/icgan.slrm b/scripts/eval_scripts/places365/icgan.slrm
deleted file mode 100644
index 84a9317..0000000
--- a/scripts/eval_scripts/places365/icgan.slrm
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-
-#SBATCH --job-name="places365"
-#SBATCH --partition=rtx6000
-#SBATCH --qos=deadline
-#SBATCH --account=deadline	
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:4
-#SBATCH --ntasks-per-node=1
-#SBATCH --cpus-per-task=32
-#SBATCH --mem=0
-#SBATCH --output=places365_baseline_160_%j.out
-#SBATCH --error=places365_baseline_160_%j.err
-#SBATCH --open-mode=append
-#SBATCH --wait-all-nodes=1
-#SBATCH --time=72:00:00
-
-# activate virtual environment
-source /ssd003/projects/aieng/envs/genssl2/bin/activate
-
-export NCCL_IB_DISABLE=1  # Our cluster does not have InfiniBand. We need to disable usage using this flag.
-export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
-# export CUDA_LAUNCH_BLOCKING=1
-export MASTER_ADDR="$(hostname --fqdn)"
-export MASTER_PORT="$(python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1])')"
-export RDVZ_ID=$RANDOM
-echo "RDZV Endpoint $MASTER_ADDR:$MASTER_PORT"
-
-echo $MASTER_ADDR
-echo $MASTER_PORT
-
-export PYTHONPATH="."
-nvidia-smi
-
-python simsiam/linear_eval.py \
-    --data="/projects/imagenet_synthetic/fereshteh_datasets/places365" \
-    --arch="resnet50" \
-    --multiprocessing-distributed \
-    --lars \
-    --batch-size=4096 \
-    --epochs=100 \
-    -j=16 \
-    --world-size 1 \
-    --rank 0 \
-    --pretrained="/projects/imagenet_synthetic/model_checkpoints/simsiam_icgan_seed43_bs128_rforig_2024-03-05-12-52/checkpoint_0160.pth.tar"\
-    --dist-url "tcp://$MASTER_ADDR:$MASTER_PORT" \
-    --dataset_name="places365" \
-    --num_classes=434
\ No newline at end of file
diff --git a/scripts/eval_scripts/places365/stablediff.slrm b/scripts/eval_scripts/places365/stablediff.slrm
deleted file mode 100644
index 8985fae..0000000
--- a/scripts/eval_scripts/places365/stablediff.slrm
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-
-#SBATCH --job-name="places365"
-#SBATCH --partition=rtx6000
-#SBATCH --qos=deadline
-#SBATCH --account=deadline	
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:4
-#SBATCH --ntasks-per-node=1
-#SBATCH --cpus-per-task=32
-#SBATCH --mem=0
-#SBATCH --output=places365_baseline_160_%j.out
-#SBATCH --error=places365_baseline_160_%j.err
-#SBATCH --open-mode=append
-#SBATCH --wait-all-nodes=1
-#SBATCH --time=72:00:00
-
-# activate virtual environment
-source /ssd003/projects/aieng/envs/genssl2/bin/activate
-
-export NCCL_IB_DISABLE=1  # Our cluster does not have InfiniBand. We need to disable usage using this flag.
-export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
-# export CUDA_LAUNCH_BLOCKING=1
-export MASTER_ADDR="$(hostname --fqdn)"
-export MASTER_PORT="$(python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1])')"
-export RDVZ_ID=$RANDOM
-echo "RDZV Endpoint $MASTER_ADDR:$MASTER_PORT"
-
-echo $MASTER_ADDR
-echo $MASTER_PORT
-
-export PYTHONPATH="."
-nvidia-smi
-
-python simsiam/linear_eval.py \
-    --data="/projects/imagenet_synthetic/fereshteh_datasets/places365" \
-    --arch="resnet50" \
-    --multiprocessing-distributed \
-    --lars \
-    --batch-size=4096 \
-    --epochs=100 \
-    -j=16 \
-    --world-size 1 \
-    --rank 0 \
-    --pretrained="/projects/imagenet_synthetic/model_checkpoints/simsiam_stablediff_p0p5_seed43_2024-03-05-13-39/checkpoint_0160.pth.tar"\
-    --dist-url "tcp://$MASTER_ADDR:$MASTER_PORT" \
-    --dataset_name="places365" \
-    --num_classes=434
\ No newline at end of file
diff --git a/scripts/generation_scripts/gen_img_icgan.slrm b/scripts/generation_scripts/gen_img_icgan.slrm
index 6741e60..2e9e29e 100644
--- a/scripts/generation_scripts/gen_img_icgan.slrm
+++ b/scripts/generation_scripts/gen_img_icgan.slrm
@@ -14,7 +14,7 @@
 PY_ARGS=${@:1}
 
 # activate virtual environment
-source /ssd003/projects/aieng/envs/genssl2/bin/activate
+source YOUR_VENV_PATH/bin/activate
 
 export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
 export CUDA_LAUNCH_BLOCKING=1
@@ -26,7 +26,7 @@ export PYTHONPATH="."
 nvidia-smi
 
 srun python data_generation/img2img_icgan.py \
---outdir /projects/imagenet_synthetic/synthetic_icgan \
+--outdir SAVE_DIR \
 --num_shards=7 \
 --shard_index=2 \
 --image_version=1 \
diff --git a/scripts/generation_scripts/gen_img_stablediff.slrm b/scripts/generation_scripts/gen_img_stablediff.slrm
index 6113e05..87852af 100644
--- a/scripts/generation_scripts/gen_img_stablediff.slrm
+++ b/scripts/generation_scripts/gen_img_stablediff.slrm
@@ -14,7 +14,7 @@
 PY_ARGS=${@:1}
 
 # activate virtual environment
-source /ssd003/projects/aieng/envs/genssl2/bin/activate
+source YOUR_VENV_PATH/bin/activate
 
 export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
 export CUDA_LAUNCH_BLOCKING=1
@@ -26,7 +26,7 @@ export PYTHONPATH="."
 nvidia-smi
 
 srun python data_generation/img2img_stable_diff.py \
---outdir /projects/imagenet_synthetic/arashaf_stablediff_batched \
+--outdir SAVE_DIR \
 --num_shards=7 \
 --shard_index=2 \
 --image_version=1 \
diff --git a/scripts/solo_learn/eval_solo_learn.slrm b/scripts/solo_learn/eval_solo_learn.slrm
index 51a5731..0a3666b 100644
--- a/scripts/solo_learn/eval_solo_learn.slrm
+++ b/scripts/solo_learn/eval_solo_learn.slrm
@@ -1,8 +1,7 @@
 #!/bin/bash
 
-#SBATCH --job-name="eval_simsiam_single"
-#SBATCH --partition=a40
-#SBATCH --qos=a40_arashaf
+#SBATCH --job-name="eval_simclr_single"
+#SBATCH --qos=m
 #SBATCH --nodes=1
 #SBATCH --gres=gpu:a40:4
 #SBATCH --ntasks-per-node=4
@@ -15,7 +14,7 @@
 #SBATCH --time=12:00:00
 
 # load virtual environment
-source /ssd003/projects/aieng/envs/genssl3/bin/activate
+source YOUR_VENV_PATH/bin/activate
 
 export NCCL_IB_DISABLE=1  # Our cluster does not have InfiniBand. We need to disable usage using this flag.
 export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
diff --git a/scripts/solo_learn/train_solo_learn.slrm b/scripts/solo_learn/train_solo_learn.slrm
index fbe9102..9ac8d76 100644
--- a/scripts/solo_learn/train_solo_learn.slrm
+++ b/scripts/solo_learn/train_solo_learn.slrm
@@ -1,8 +1,7 @@
 #!/bin/bash
 
 #SBATCH --job-name="simclr_single_train"
-#SBATCH --partition=a40
-#SBATCH --qos=a40_arashaf
+#SBATCH --qos=m
 #SBATCH --nodes=1
 #SBATCH --gres=gpu:a40:4
 #SBATCH --ntasks-per-node=4
@@ -15,7 +14,7 @@
 #SBATCH --time=72:00:00
 
 # load virtual environment
-source /ssd003/projects/aieng/envs/genssl3/bin/activate
+source YOUR_VENV_PATH/bin/activate
 
 export NCCL_IB_DISABLE=1  # Our cluster does not have InfiniBand. We need to disable usage using this flag.
 export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
diff --git a/scripts/solo_learn/train_synth_solo_learn.slrm b/scripts/solo_learn/train_synth_solo_learn.slrm
deleted file mode 100644
index 4f11386..0000000
--- a/scripts/solo_learn/train_synth_solo_learn.slrm
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-
-#SBATCH --job-name="simclr_single_train"
-#SBATCH --partition=a40
-#SBATCH --qos=a40_arashaf
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:a40:4
-#SBATCH --ntasks-per-node=4
-#SBATCH --cpus-per-task=8
-#SBATCH --mem=0
-#SBATCH --output=singlenode-%j.out
-#SBATCH --error=singlenode-%j.err
-#SBATCH --open-mode=append
-#SBATCH --wait-all-nodes=1
-#SBATCH --time=72:00:00
-
-# load virtual environment
-source /ssd003/projects/aieng/envs/genssl3/bin/activate
-
-export NCCL_IB_DISABLE=1  # Our cluster does not have InfiniBand. We need to disable usage using this flag.
-export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
-
-export PYTHONPATH="."
-nvidia-smi
-
-torchrun --nproc-per-node=4 --nnodes=1 solo-learn/main_pretrain.py \
-    --config-path scripts/pretrain/imagenet/ \
-    --config-name simclr_synthetic.yaml
\ No newline at end of file
diff --git a/scripts/train_scrpits/train_simsiam_multinode.slrm b/scripts/train_scrpits/train_simsiam_multinode.slrm
deleted file mode 100644
index 0d4d55c..0000000
--- a/scripts/train_scrpits/train_simsiam_multinode.slrm
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/bin/bash
-
-#SBATCH --job-name="simsiam_multi_train"
-#SBATCH --partition=a40
-#SBATCH --account=deadline
-#SBATCH --qos=deadline
-#SBATCH --nodes=2
-#SBATCH --gres=gpu:a40:4
-#SBATCH --ntasks-per-node=1
-#SBATCH --open-mode=append
-#SBATCH --wait-all-nodes=1
-#SBATCH --time=01:00:00
-#SBATCH --cpus-per-task=4
-#SBATCH --mem-per-cpu=8G
-#SBATCH --output=slurm-%j.out
-#SBATCH --error=slurm-%j.err
-# load virtual environment
-source /ssd003/projects/aieng/envs/genssl2/bin/activate
-
-export NCCL_IB_DISABLE=1  # Our cluster does not have InfiniBand. We need to disable usage using this flag.
-export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
-# export CUDA_LAUNCH_BLOCKING=1
-
-
-export MASTER_ADDR="$(hostname --fqdn)"
-export MASTER_PORT="$(python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1])')"
-export RDVZ_ID=$RANDOM
-echo "RDZV Endpoint $MASTER_ADDR:$MASTER_PORT"
-
-export PYTHONPATH="."
-nvidia-smi
-
-srun -p $SLURM_JOB_PARTITION \
-    -c $SLURM_CPUS_ON_NODE \
-    -N $SLURM_JOB_NUM_NODES \
-    --mem=0 \
-    --gres=gpu:$SLURM_JOB_PARTITION:$SLURM_GPUS_ON_NODE \
-    bash -c 'torchrun \
-    --nproc-per-node=$SLURM_GPUS_ON_NODE \
-    --nnodes=$SLURM_JOB_NUM_NODES \
-    --rdzv-endpoint $MASTER_ADDR:$MASTER_PORT \
-    --rdzv-id $RDVZ_ID \
-    --rdzv-backend c10d \
-    simsiam/train_simsiam.py.py \
-    -a resnet50 \
-    --fix-pred-lr \
-    --distributed_mode \
-    --batch-size=128 \
-    --epochs=200 \
-    --experiment="simsiam_icgan_seed43_bs128_rforig" \
-    --resume_from_checkpoint="/projects/imagenet_synthetic/model_checkpoints/_original_simsiam/checkpoint_0099.pth.tar" \
-    --seed=43 \
-    --use_synthetic_data \
-    --synthetic_data_dir="/projects/imagenet_synthetic/synthetic_icgan" \
-    --synthetic_index_min=0 \
-    --synthetic_index_max=4 \
-    --generative_augmentation_prob=0.5'
\ No newline at end of file
diff --git a/scripts/train_scrpits/train_simsiam_singlenode.slrm b/scripts/train_scrpits/train_simsiam_singlenode.slrm
deleted file mode 100644
index 4be266e..0000000
--- a/scripts/train_scrpits/train_simsiam_singlenode.slrm
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-
-#SBATCH --job-name="simsiam_single_train"
-#SBATCH --partition=a40
-#SBATCH --qos=deadline
-#SBATCH --account=deadline	
-#SBATCH --nodes=1
-#SBATCH --gres=gpu:a40:4
-#SBATCH --ntasks-per-node=1
-#SBATCH --cpus-per-task=32
-#SBATCH --mem=0
-#SBATCH --output=singlenode-%j.out
-#SBATCH --error=singlenode-%j.err
-#SBATCH --open-mode=append
-#SBATCH --wait-all-nodes=1
-#SBATCH --time=12:00:00
-
-# activate virtual environment
-source /ssd003/projects/aieng/envs/genssl2/bin/activate
-
-export NCCL_IB_DISABLE=1  # Our cluster does not have InfiniBand. We need to disable usage using this flag.
-export TORCH_NCCL_ASYNC_ERROR_HANDLING=1 # set to 1 for NCCL backend
-# export CUDA_LAUNCH_BLOCKING=1
-
-export PYTHONPATH="."
-nvidia-smi
-
-torchrun --nproc-per-node=4 --nnodes=1 simsiam/train_simsiam.py \
-    -a resnet50 \
-    --fix-pred-lr \
-    --distributed_mode \
-    --batch-size=128 \
-    --epochs=100 \
-    --experiment="simsiam_stablediff_p0p5_seed43" \
-    --resume_from_checkpoint="" \
-    --seed=43 \
-    --use_synthetic_data \
-    --synthetic_data_dir="/projects/imagenet_synthetic/arashaf_stablediff_batched" \
-    --synthetic_index_min=0 \
-    --synthetic_index_max=9 \
-    --generative_augmentation_prob=0.5
\ No newline at end of file
diff --git a/simsiam/LARC.py b/simsiam/LARC.py
deleted file mode 100644
index fe41b13..0000000
--- a/simsiam/LARC.py
+++ /dev/null
@@ -1,107 +0,0 @@
-import torch
-from torch import nn
-from torch.nn.parameter import Parameter
-
-
-class LARC(object):
-    """
-    :class:`LARC` is a pytorch implementation of both the scaling and clipping variants of LARC,
-    in which the ratio between gradient and parameter magnitudes is used to calculate an adaptive
-    local learning rate for each individual parameter. The algorithm is designed to improve
-    convergence of large batch training.
-
-    See https://arxiv.org/abs/1708.03888 for calculation of the local learning rate.
-    In practice it modifies the gradients of parameters as a proxy for modifying the learning rate
-    of the parameters. This design allows it to be used as a wrapper around any torch.optim Optimizer.
-    ```
-    model = ...
-    optim = torch.optim.Adam(model.parameters(), lr=...)
-    optim = LARC(optim)
-    ```
-    It can even be used in conjunction with apex.fp16_utils.FP16_optimizer.
-    ```
-    model = ...
-    optim = torch.optim.Adam(model.parameters(), lr=...)
-    optim = LARC(optim)
-    optim = apex.fp16_utils.FP16_Optimizer(optim)
-    ```
-    Args:
-        optimizer: Pytorch optimizer to wrap and modify learning rate for.
-        trust_coefficient: Trust coefficient for calculating the lr. See https://arxiv.org/abs/1708.03888
-        clip: Decides between clipping or scaling mode of LARC. If `clip=True` the learning rate is set to `min(optimizer_lr, local_lr)` for each parameter. If `clip=False` the learning rate is set to `local_lr*optimizer_lr`.
-        eps: epsilon kludge to help with numerical stability while calculating adaptive_lr
-    """
-
-    def __init__(self, optimizer, trust_coefficient=0.02, clip=True, eps=1e-8):
-        self.optim = optimizer
-        self.trust_coefficient = trust_coefficient
-        self.eps = eps
-        self.clip = clip
-
-    def __getstate__(self):
-        return self.optim.__getstate__()
-
-    def __setstate__(self, state):
-        self.optim.__setstate__(state)
-
-    @property
-    def state(self):
-        return self.optim.state
-
-    def __repr__(self):
-        return self.optim.__repr__()
-
-    @property
-    def param_groups(self):
-        return self.optim.param_groups
-
-    @param_groups.setter
-    def param_groups(self, value):
-        self.optim.param_groups = value
-
-    def state_dict(self):
-        return self.optim.state_dict()
-
-    def load_state_dict(self, state_dict):
-        self.optim.load_state_dict(state_dict)
-
-    def zero_grad(self):
-        self.optim.zero_grad()
-
-    def add_param_group(self, param_group):
-        self.optim.add_param_group(param_group)
-
-    def step(self):
-        with torch.no_grad():
-            weight_decays = []
-            for group in self.optim.param_groups:
-                # absorb weight decay control from optimizer
-                weight_decay = group["weight_decay"] if "weight_decay" in group else 0
-                weight_decays.append(weight_decay)
-                group["weight_decay"] = 0
-                for p in group["params"]:
-                    if p.grad is None:
-                        continue
-                    param_norm = torch.norm(p.data)
-                    grad_norm = torch.norm(p.grad.data)
-
-                    if param_norm != 0 and grad_norm != 0:
-                        # calculate adaptive lr + weight decay
-                        adaptive_lr = (
-                            self.trust_coefficient
-                            * (param_norm)
-                            / (grad_norm + param_norm * weight_decay + self.eps)
-                        )
-
-                        # clip learning rate for LARC
-                        if self.clip:
-                            # calculation of adaptive_lr so that when multiplied by lr it equals `min(adaptive_lr, lr)`
-                            adaptive_lr = min(adaptive_lr / group["lr"], 1)
-
-                        p.grad.data += weight_decay * p.data
-                        p.grad.data *= adaptive_lr
-
-        self.optim.step()
-        # return weight decay control to optimizer
-        for i, group in enumerate(self.optim.param_groups):
-            group["weight_decay"] = weight_decays[i]
diff --git a/simsiam/LICENSE b/simsiam/LICENSE
deleted file mode 100644
index 105a4fb..0000000
--- a/simsiam/LICENSE
+++ /dev/null
@@ -1,399 +0,0 @@
-Attribution-NonCommercial 4.0 International
-
-=======================================================================
-
-Creative Commons Corporation ("Creative Commons") is not a law firm and
-does not provide legal services or legal advice. Distribution of
-Creative Commons public licenses does not create a lawyer-client or
-other relationship. Creative Commons makes its licenses and related
-information available on an "as-is" basis. Creative Commons gives no
-warranties regarding its licenses, any material licensed under their
-terms and conditions, or any related information. Creative Commons
-disclaims all liability for damages resulting from their use to the
-fullest extent possible.
-
-Using Creative Commons Public Licenses
-
-Creative Commons public licenses provide a standard set of terms and
-conditions that creators and other rights holders may use to share
-original works of authorship and other material subject to copyright
-and certain other rights specified in the public license below. The
-following considerations are for informational purposes only, are not
-exhaustive, and do not form part of our licenses.
-
-     Considerations for licensors: Our public licenses are
-     intended for use by those authorized to give the public
-     permission to use material in ways otherwise restricted by
-     copyright and certain other rights. Our licenses are
-     irrevocable. Licensors should read and understand the terms
-     and conditions of the license they choose before applying it.
-     Licensors should also secure all rights necessary before
-     applying our licenses so that the public can reuse the
-     material as expected. Licensors should clearly mark any
-     material not subject to the license. This includes other CC-
-     licensed material, or material used under an exception or
-     limitation to copyright. More considerations for licensors:
-  wiki.creativecommons.org/Considerations_for_licensors
-
-     Considerations for the public: By using one of our public
-     licenses, a licensor grants the public permission to use the
-     licensed material under specified terms and conditions. If
-     the licensor's permission is not necessary for any reason--for
-     example, because of any applicable exception or limitation to
-     copyright--then that use is not regulated by the license. Our
-     licenses grant only permissions under copyright and certain
-     other rights that a licensor has authority to grant. Use of
-     the licensed material may still be restricted for other
-     reasons, including because others have copyright or other
-     rights in the material. A licensor may make special requests,
-     such as asking that all changes be marked or described.
-     Although not required by our licenses, you are encouraged to
-     respect those requests where reasonable. More_considerations
-     for the public: 
-  wiki.creativecommons.org/Considerations_for_licensees
-
-=======================================================================
-
-Creative Commons Attribution-NonCommercial 4.0 International Public
-License
-
-By exercising the Licensed Rights (defined below), You accept and agree
-to be bound by the terms and conditions of this Creative Commons
-Attribution-NonCommercial 4.0 International Public License ("Public
-License"). To the extent this Public License may be interpreted as a
-contract, You are granted the Licensed Rights in consideration of Your
-acceptance of these terms and conditions, and the Licensor grants You
-such rights in consideration of benefits the Licensor receives from
-making the Licensed Material available under these terms and
-conditions.
-
-Section 1 -- Definitions.
-
-  a. Adapted Material means material subject to Copyright and Similar
-     Rights that is derived from or based upon the Licensed Material
-     and in which the Licensed Material is translated, altered,
-     arranged, transformed, or otherwise modified in a manner requiring
-     permission under the Copyright and Similar Rights held by the
-     Licensor. For purposes of this Public License, where the Licensed
-     Material is a musical work, performance, or sound recording,
-     Adapted Material is always produced where the Licensed Material is
-     synched in timed relation with a moving image.
-
-  b. Adapter's License means the license You apply to Your Copyright
-     and Similar Rights in Your contributions to Adapted Material in
-     accordance with the terms and conditions of this Public License.
-
-  c. Copyright and Similar Rights means copyright and/or similar rights
-     closely related to copyright including, without limitation,
-     performance, broadcast, sound recording, and Sui Generis Database
-     Rights, without regard to how the rights are labeled or
-     categorized. For purposes of this Public License, the rights
-     specified in Section 2(b)(1)-(2) are not Copyright and Similar
-     Rights.
-  d. Effective Technological Measures means those measures that, in the
-     absence of proper authority, may not be circumvented under laws
-     fulfilling obligations under Article 11 of the WIPO Copyright
-     Treaty adopted on December 20, 1996, and/or similar international
-     agreements.
-
-  e. Exceptions and Limitations means fair use, fair dealing, and/or
-     any other exception or limitation to Copyright and Similar Rights
-     that applies to Your use of the Licensed Material.
-
-  f. Licensed Material means the artistic or literary work, database,
-     or other material to which the Licensor applied this Public
-     License.
-
-  g. Licensed Rights means the rights granted to You subject to the
-     terms and conditions of this Public License, which are limited to
-     all Copyright and Similar Rights that apply to Your use of the
-     Licensed Material and that the Licensor has authority to license.
-
-  h. Licensor means the individual(s) or entity(ies) granting rights
-     under this Public License.
-
-  i. NonCommercial means not primarily intended for or directed towards
-     commercial advantage or monetary compensation. For purposes of
-     this Public License, the exchange of the Licensed Material for
-     other material subject to Copyright and Similar Rights by digital
-     file-sharing or similar means is NonCommercial provided there is
-     no payment of monetary compensation in connection with the
-     exchange.
-
-  j. Share means to provide material to the public by any means or
-     process that requires permission under the Licensed Rights, such
-     as reproduction, public display, public performance, distribution,
-     dissemination, communication, or importation, and to make material
-     available to the public including in ways that members of the
-     public may access the material from a place and at a time
-     individually chosen by them.
-
-  k. Sui Generis Database Rights means rights other than copyright
-     resulting from Directive 96/9/EC of the European Parliament and of
-     the Council of 11 March 1996 on the legal protection of databases,
-     as amended and/or succeeded, as well as other essentially
-     equivalent rights anywhere in the world.
-
-  l. You means the individual or entity exercising the Licensed Rights
-     under this Public License. Your has a corresponding meaning.
-
-Section 2 -- Scope.
-
-  a. License grant.
-
-       1. Subject to the terms and conditions of this Public License,
-          the Licensor hereby grants You a worldwide, royalty-free,
-          non-sublicensable, non-exclusive, irrevocable license to
-          exercise the Licensed Rights in the Licensed Material to:
-
-            a. reproduce and Share the Licensed Material, in whole or
-               in part, for NonCommercial purposes only; and
-
-            b. produce, reproduce, and Share Adapted Material for
-               NonCommercial purposes only.
-
-       2. Exceptions and Limitations. For the avoidance of doubt, where
-          Exceptions and Limitations apply to Your use, this Public
-          License does not apply, and You do not need to comply with
-          its terms and conditions.
-
-       3. Term. The term of this Public License is specified in Section
-          6(a).
-
-       4. Media and formats; technical modifications allowed. The
-          Licensor authorizes You to exercise the Licensed Rights in
-          all media and formats whether now known or hereafter created,
-          and to make technical modifications necessary to do so. The
-          Licensor waives and/or agrees not to assert any right or
-          authority to forbid You from making technical modifications
-          necessary to exercise the Licensed Rights, including
-          technical modifications necessary to circumvent Effective
-          Technological Measures. For purposes of this Public License,
-          simply making modifications authorized by this Section 2(a)
-          (4) never produces Adapted Material.
-
-       5. Downstream recipients.
-
-            a. Offer from the Licensor -- Licensed Material. Every
-               recipient of the Licensed Material automatically
-               receives an offer from the Licensor to exercise the
-               Licensed Rights under the terms and conditions of this
-               Public License.
-
-            b. No downstream restrictions. You may not offer or impose
-               any additional or different terms or conditions on, or
-               apply any Effective Technological Measures to, the
-               Licensed Material if doing so restricts exercise of the
-               Licensed Rights by any recipient of the Licensed
-               Material.
-
-       6. No endorsement. Nothing in this Public License constitutes or
-          may be construed as permission to assert or imply that You
-          are, or that Your use of the Licensed Material is, connected
-          with, or sponsored, endorsed, or granted official status by,
-          the Licensor or others designated to receive attribution as
-          provided in Section 3(a)(1)(A)(i).
-
-  b. Other rights.
-
-       1. Moral rights, such as the right of integrity, are not
-          licensed under this Public License, nor are publicity,
-          privacy, and/or other similar personality rights; however, to
-          the extent possible, the Licensor waives and/or agrees not to
-          assert any such rights held by the Licensor to the limited
-          extent necessary to allow You to exercise the Licensed
-          Rights, but not otherwise.
-
-       2. Patent and trademark rights are not licensed under this
-          Public License.
-
-       3. To the extent possible, the Licensor waives any right to
-          collect royalties from You for the exercise of the Licensed
-          Rights, whether directly or through a collecting society
-          under any voluntary or waivable statutory or compulsory
-          licensing scheme. In all other cases the Licensor expressly
-          reserves any right to collect such royalties, including when
-          the Licensed Material is used other than for NonCommercial
-          purposes.
-
-Section 3 -- License Conditions.
-
-Your exercise of the Licensed Rights is expressly made subject to the
-following conditions.
-
-  a. Attribution.
-
-       1. If You Share the Licensed Material (including in modified
-          form), You must:
-
-            a. retain the following if it is supplied by the Licensor
-               with the Licensed Material:
-
-                 i. identification of the creator(s) of the Licensed
-                    Material and any others designated to receive
-                    attribution, in any reasonable manner requested by
-                    the Licensor (including by pseudonym if
-                    designated);
-
-                ii. a copyright notice;
-
-               iii. a notice that refers to this Public License;
-
-                iv. a notice that refers to the disclaimer of
-                    warranties;
-
-                 v. a URI or hyperlink to the Licensed Material to the
-                    extent reasonably practicable;
-
-            b. indicate if You modified the Licensed Material and
-               retain an indication of any previous modifications; and
-
-            c. indicate the Licensed Material is licensed under this
-               Public License, and include the text of, or the URI or
-               hyperlink to, this Public License.
-
-       2. You may satisfy the conditions in Section 3(a)(1) in any
-          reasonable manner based on the medium, means, and context in
-          which You Share the Licensed Material. For example, it may be
-          reasonable to satisfy the conditions by providing a URI or
-          hyperlink to a resource that includes the required
-          information.
-
-       3. If requested by the Licensor, You must remove any of the
-          information required by Section 3(a)(1)(A) to the extent
-          reasonably practicable.
-
-       4. If You Share Adapted Material You produce, the Adapter's
-          License You apply must not prevent recipients of the Adapted
-          Material from complying with this Public License.
-
-Section 4 -- Sui Generis Database Rights.
-
-Where the Licensed Rights include Sui Generis Database Rights that
-apply to Your use of the Licensed Material:
-
-  a. for the avoidance of doubt, Section 2(a)(1) grants You the right
-     to extract, reuse, reproduce, and Share all or a substantial
-     portion of the contents of the database for NonCommercial purposes
-     only;
-
-  b. if You include all or a substantial portion of the database
-     contents in a database in which You have Sui Generis Database
-     Rights, then the database in which You have Sui Generis Database
-     Rights (but not its individual contents) is Adapted Material; and
-
-  c. You must comply with the conditions in Section 3(a) if You Share
-     all or a substantial portion of the contents of the database.
-
-For the avoidance of doubt, this Section 4 supplements and does not
-replace Your obligations under this Public License where the Licensed
-Rights include other Copyright and Similar Rights.
-
-Section 5 -- Disclaimer of Warranties and Limitation of Liability.
-
-  a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
-     EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
-     AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
-     ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
-     IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
-     WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
-     PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
-     ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
-     KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
-     ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
-
-  b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
-     TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
-     NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
-     INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
-     COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
-     USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
-     ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
-     DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
-     IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
-
-  c. The disclaimer of warranties and limitation of liability provided
-     above shall be interpreted in a manner that, to the extent
-     possible, most closely approximates an absolute disclaimer and
-     waiver of all liability.
-
-Section 6 -- Term and Termination.
-
-  a. This Public License applies for the term of the Copyright and
-     Similar Rights licensed here. However, if You fail to comply with
-     this Public License, then Your rights under this Public License
-     terminate automatically.
-
-  b. Where Your right to use the Licensed Material has terminated under
-     Section 6(a), it reinstates:
-
-       1. automatically as of the date the violation is cured, provided
-          it is cured within 30 days of Your discovery of the
-          violation; or
-
-       2. upon express reinstatement by the Licensor.
-
-     For the avoidance of doubt, this Section 6(b) does not affect any
-     right the Licensor may have to seek remedies for Your violations
-     of this Public License.
-
-  c. For the avoidance of doubt, the Licensor may also offer the
-     Licensed Material under separate terms or conditions or stop
-     distributing the Licensed Material at any time; however, doing so
-     will not terminate this Public License.
-
-  d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
-     License.
-
-Section 7 -- Other Terms and Conditions.
-
-  a. The Licensor shall not be bound by any additional or different
-     terms or conditions communicated by You unless expressly agreed.
-
-  b. Any arrangements, understandings, or agreements regarding the
-     Licensed Material not stated herein are separate from and
-     independent of the terms and conditions of this Public License.
-
-Section 8 -- Interpretation.
-
-  a. For the avoidance of doubt, this Public License does not, and
-     shall not be interpreted to, reduce, limit, restrict, or impose
-     conditions on any use of the Licensed Material that could lawfully
-     be made without permission under this Public License.
-
-  b. To the extent possible, if any provision of this Public License is
-     deemed unenforceable, it shall be automatically reformed to the
-     minimum extent necessary to make it enforceable. If the provision
-     cannot be reformed, it shall be severed from this Public License
-     without affecting the enforceability of the remaining terms and
-     conditions.
-
-  c. No term or condition of this Public License will be waived and no
-     failure to comply consented to unless expressly agreed to by the
-     Licensor.
-
-  d. Nothing in this Public License constitutes or may be interpreted
-     as a limitation upon, or waiver of, any privileges and immunities
-     that apply to the Licensor or You, including from the legal
-     processes of any jurisdiction or authority.
-
-=======================================================================
-
-Creative Commons is not a party to its public
-licenses. Notwithstanding, Creative Commons may elect to apply one of
-its public licenses to material it publishes and in those instances
-will be considered the “Licensor.” The text of the Creative Commons
-public licenses is dedicated to the public domain under the CC0 Public
-Domain Dedication. Except for the limited purpose of indicating that
-material is shared under a Creative Commons public license or as
-otherwise permitted by the Creative Commons policies published at
-creativecommons.org/policies, Creative Commons does not authorize the
-use of the trademark "Creative Commons" or any other trademark or logo
-of Creative Commons without its prior written consent including,
-without limitation, in connection with any unauthorized modifications
-to any of its public licenses or any other arrangements,
-understandings, or agreements concerning use of licensed material. For
-the avoidance of doubt, this paragraph does not form part of the
-public licenses.
-
-Creative Commons may be contacted at creativecommons.org.
\ No newline at end of file
diff --git a/simsiam/README.md b/simsiam/README.md
deleted file mode 100644
index 47bab1b..0000000
--- a/simsiam/README.md
+++ /dev/null
@@ -1,96 +0,0 @@
-# SimSiam: Exploring Simple Siamese Representation Learning
-
-<p align="center">
-    <img width="400" alt="simsiam" src="https://user-images.githubusercontent.com/2420753/118343499-4c410100-b4de-11eb-9313-d49e65440a7e.png">
-</p>
-
-This is a PyTorch implementation of the [SimSiam paper](https://arxiv.org/abs/2011.10566):
-```
-@Article{chen2020simsiam,
-  author  = {Xinlei Chen and Kaiming He},
-  title   = {Exploring Simple Siamese Representation Learning},
-  journal = {arXiv preprint arXiv:2011.10566},
-  year    = {2020},
-}
-```
-
-### Preparation
-
-Install PyTorch and download the ImageNet dataset following the [official PyTorch ImageNet training code](https://github.com/pytorch/examples/tree/master/imagenet). Similar to [MoCo](https://github.com/facebookresearch/moco), the code release contains minimal modifications for both unsupervised pre-training and linear classification to that code. 
-
-In addition, install [apex](https://github.com/NVIDIA/apex) for the [LARS](https://github.com/NVIDIA/apex/blob/master/apex/parallel/LARC.py) implementation needed for linear classification.
-
-### Unsupervised Pre-Training
-
-Only **multi-gpu**, **DistributedDataParallel** training is supported; single-gpu or DataParallel training is not supported.
-
-To do unsupervised pre-training of a ResNet-50 model on ImageNet in an 8-gpu machine, run:
-```
-python main_simsiam.py \
-  -a resnet50 \
-  --dist-url 'tcp://localhost:10001' --multiprocessing-distributed --world-size 1 --rank 0 \
-  --fix-pred-lr \
-  [your imagenet-folder with train and val folders]
-```
-The script uses all the default hyper-parameters as described in the paper, and uses the default augmentation recipe from [MoCo v2](https://arxiv.org/abs/2003.04297). 
-
-The above command performs pre-training with a non-decaying predictor learning rate for 100 epochs, corresponding to the last row of Table 1 in the paper. 
-
-### Linear Classification
-
-With a pre-trained model, to train a supervised linear classifier on frozen features/weights in an 8-gpu machine, run:
-```
-python main_lincls.py \
-  -a resnet50 \
-  --dist-url 'tcp://localhost:10001' --multiprocessing-distributed --world-size 1 --rank 0 \
-  --pretrained [your checkpoint path]/checkpoint_0099.pth.tar \
-  --lars \
-  [your imagenet-folder with train and val folders]
-```
-
-The above command uses LARS optimizer and a default batch size of 4096.
-
-### Models and Logs
-
-Our pre-trained ResNet-50 models and logs:
-<table><tbody>
-<!-- START TABLE -->
-<!-- TABLE HEADER -->
-<th valign="bottom">pre-train<br/>epochs</th>
-<th valign="bottom">batch<br/>size</th>
-<th valign="bottom">pre-train<br/>ckpt</th>
-<th valign="bottom">pre-train<br/>log</th>
-<th valign="bottom">linear cls.<br/>ckpt</th>
-<th valign="bottom">linear cls.<br/>log</th>
-<th valign="center">top-1 acc.</th>
-<!-- TABLE BODY -->
-<tr>
-<td align="center">100</td>
-<td align="center">512</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/simsiam/models/100ep/pretrain/checkpoint_0099.pth.tar">link</a></td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/simsiam/logs/100ep/pretrain.log">link</a></td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/simsiam/models/100ep/linear/model_best.pth.tar">link</a></td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/simsiam/logs/100ep/linear.log">link</a></td>
-<td align="center">68.1</td>
-</tr>
-<tr>
-<td align="center">100</td>
-<td align="center">256</td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/simsiam/models/100ep-256bs/pretrain/checkpoint_0099.pth.tar">link</a></td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/simsiam/logs/100ep-256bs/pretrain.log">link</a></td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/simsiam/models/100ep-256bs/linear/model_best.pth.tar">link</a></td>
-<td align="center"><a href="https://dl.fbaipublicfiles.com/simsiam/logs/100ep-256bs/linear.log">link</a></td>
-<td align="center">68.3</td>
-</tr>
-</tbody></table>
-
-Settings for the above: 8 NVIDIA V100 GPUs, CUDA 10.1/CuDNN 7.6.5, PyTorch 1.7.0.
-
-### Transferring to Object Detection
-
-Same as [MoCo](https://github.com/facebookresearch/moco) for object detection transfer, please see [moco/detection](https://github.com/facebookresearch/moco/tree/master/detection).
-
-
-### License
-
-This project is under the CC-BY-NC 4.0 license. See [LICENSE](LICENSE) for details.
\ No newline at end of file
diff --git a/simsiam/__init__.py b/simsiam/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/simsiam/builder.py b/simsiam/builder.py
deleted file mode 100644
index 423af89..0000000
--- a/simsiam/builder.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-import torch
-import torch.nn as nn
-
-
-class SimSiam(nn.Module):
-    """
-    Build a SimSiam model.
-    """
-
-    def __init__(self, base_encoder, dim=2048, pred_dim=512):
-        """
-        dim: feature dimension (default: 2048)
-        pred_dim: hidden dimension of the predictor (default: 512)
-        """
-        super(SimSiam, self).__init__()
-
-        # create the encoder
-        # num_classes is the output fc dimension, zero-initialize last BNs
-        self.encoder = base_encoder(num_classes=dim, zero_init_residual=True)
-
-        # build a 3-layer projector
-        prev_dim = self.encoder.fc.weight.shape[1]
-        self.encoder.fc = nn.Sequential(
-            nn.Linear(prev_dim, prev_dim, bias=False),
-            nn.BatchNorm1d(prev_dim),
-            nn.ReLU(inplace=True),  # first layer
-            nn.Linear(prev_dim, prev_dim, bias=False),
-            nn.BatchNorm1d(prev_dim),
-            nn.ReLU(inplace=True),  # second layer
-            self.encoder.fc,
-            nn.BatchNorm1d(dim, affine=False),
-        )  # output layer
-        self.encoder.fc[
-            6
-        ].bias.requires_grad = False  # hack: not use bias as it is followed by BN
-
-        # build a 2-layer predictor
-        self.predictor = nn.Sequential(
-            nn.Linear(dim, pred_dim, bias=False),
-            nn.BatchNorm1d(pred_dim),
-            nn.ReLU(inplace=True),  # hidden layer
-            nn.Linear(pred_dim, dim),
-        )  # output layer
-
-    def forward(self, x1, x2):
-        """
-        Input:
-            x1: first views of images
-            x2: second views of images
-        Output:
-            p1, p2, z1, z2: predictors and targets of the network
-            See Sec. 3 of https://arxiv.org/abs/2011.10566 for detailed notations
-        """
-
-        # compute features for one view
-        z1 = self.encoder(x1)  # NxC
-        z2 = self.encoder(x2)  # NxC
-
-        p1 = self.predictor(z1)  # NxC
-        p2 = self.predictor(z2)  # NxC
-
-        return p1, p2, z1.detach(), z2.detach()
diff --git a/simsiam/distributed.py b/simsiam/distributed.py
deleted file mode 100644
index 53a555d..0000000
--- a/simsiam/distributed.py
+++ /dev/null
@@ -1,135 +0,0 @@
-"""Utilities for distributed training."""
-import os
-import subprocess
-
-import torch
-import torch.distributed as dist
-
-
-def init_distributed_mode(
-    launcher,
-    backend,
-) -> None:
-    """Launch distributed training based on given launcher and backend.
-
-    Parameters
-    ----------
-    launcher : {'pytorch', 'slurm'}
-        Specifies if pytorch launch utitlity (`torchrun`) is being
-        used or if running on a SLURM cluster.
-    backend : {'nccl', 'gloo', 'mpi'}
-        Specifies which backend to use when initializing a process group.
-    """
-    if launcher == "pytorch":
-        launch_pytorch_dist(backend)
-    elif launcher == "slurm":
-        launch_slurm_dist(backend)
-    else:
-        raise RuntimeError(
-            f"Invalid launcher type: {launcher}. Use 'pytorch' or 'slurm'.",
-        )
-
-
-def launch_pytorch_dist(backend) -> None:
-    """Initialize a distributed process group with PyTorch.
-
-    NOTE: This method relies on `torchrun` to set 'MASTER_ADDR',
-    'MASTER_PORT', 'RANK', 'WORLD_SIZE' and 'LOCAL_RANK' as environment variables
-
-    Parameters
-    ----------
-    backend : {'nccl', 'gloo', 'mpi'}
-        Specifies which backend to use when initializing a process group. Can be
-        one of ``"nccl"``, ``"gloo"``, or ``"mpi"``.
-    """
-    local_rank = int(os.environ["LOCAL_RANK"])
-    torch.cuda.set_device(local_rank)
-    dist.init_process_group(backend=backend, init_method="env://")
-    disable_non_master_print()  # only print in master process
-    dist.barrier()
-
-
-def launch_slurm_dist(backend) -> None:
-    """Initialize a distributed process group when using SLURM.
-
-    Parameters
-    ----------
-    backend : {'nccl', 'gloo', 'mpi'}
-        Specifies which backend to use when initializing a process group. Can be
-        one of ``"nccl"``, ``"gloo"``, or ``"mpi"``.
-    """
-    # set the MASTER_ADDR, MASTER_PORT, RANK and WORLD_SIZE
-    # as environment variables before initializing the process group
-    if "MASTER_ADDR" not in os.environ:
-        node_list = os.environ["SLURM_NODELIST"]
-        os.environ["MASTER_ADDR"] = subprocess.getoutput(
-            f"scontrol show hostname {node_list} | head -n1",
-        )
-    if "MASTER_PORT" not in os.environ:
-        os.environ["MASTER_PORT"] = "29400"
-    os.environ["RANK"] = os.environ["SLURM_PROCID"]
-    os.environ["WORLD_SIZE"] = os.environ["SLURM_NTASKS"]
-
-    local_rank = int(os.environ["SLURM_LOCALID"])
-    print(f"Initializing distributed training in process {local_rank}")
-    torch.cuda.set_device(local_rank)
-    dist.init_process_group(backend=backend, init_method="env://")
-    disable_non_master_print()  # only print on master process
-    dist.barrier()
-
-
-# the following functions were adapted from:
-# https://github.com/pytorch/vision/blob/main/references/classification/utils.py
-def disable_non_master_print():
-    """Disable printing if not master process.
-
-    Notes
-    -----
-    Printing can be forced by adding a boolean flag, 'force', to the keyword arguments
-    to the print function call.
-    """
-    import builtins as __builtin__
-
-    builtin_print = __builtin__.print
-
-    def print(*args, **kwargs):  # noqa: A001
-        force = kwargs.pop("force", False)
-        if is_main_process() or force:
-            builtin_print(*args, **kwargs)
-
-    __builtin__.print = print
-
-
-def is_dist_avail_and_initialized() -> bool:
-    """Check if the distributed package is available and initialized."""
-    return dist.is_available() and dist.is_initialized()
-
-
-def get_world_size() -> int:
-    """Get the total number of processes a distributed process group.
-
-    It returns 1 if the PyTorch distributed package is unavailable or the
-    default process group has not been initialized.
-    """
-    if not is_dist_avail_and_initialized():
-        return 1
-    return dist.get_world_size()
-
-
-def get_rank() -> int:
-    """Return the global rank of the current process.
-
-    Returns 0 if the PyTorch distribued package is unavailable or the
-    default process group has not been initialized.
-    """
-    if not is_dist_avail_and_initialized():
-        return 0
-    return dist.get_rank()
-
-
-def is_main_process() -> bool:
-    """Check if the current process is the Master proces.
-
-    The master process typically has a rank of 0.
-    """
-    return not is_dist_avail_and_initialized() or get_rank() == 0
diff --git a/simsiam/inatural_dataset.py b/simsiam/inatural_dataset.py
deleted file mode 100644
index 65b689f..0000000
--- a/simsiam/inatural_dataset.py
+++ /dev/null
@@ -1,77 +0,0 @@
-import torch.utils.data as data
-from PIL import Image
-import os
-import json
-from torchvision import transforms
-import random
-import numpy as np
-
-
-def default_loader(path):
-    return Image.open(path).convert("RGB")
-
-
-def load_taxonomy(ann_data, tax_levels, classes):
-    # loads the taxonomy data and converts to ints
-    taxonomy = {}
-
-    if "categories" in ann_data.keys():
-        num_classes = len(ann_data["categories"])
-        for tt in tax_levels:
-            tax_data = [aa[tt] for aa in ann_data["categories"]]
-            _, tax_id = np.unique(tax_data, return_inverse=True)
-            taxonomy[tt] = dict(zip(range(num_classes), list(tax_id)))
-    else:
-        # set up dummy data
-        for tt in tax_levels:
-            taxonomy[tt] = dict(zip([0], [0]))
-
-    # create a dictionary of lists containing taxonomic labels
-    classes_taxonomic = {}
-    for cc in np.unique(classes):
-        tax_ids = [0] * len(tax_levels)
-        for ii, tt in enumerate(tax_levels):
-            tax_ids[ii] = taxonomy[tt][cc]
-        classes_taxonomic[cc] = tax_ids
-
-    return taxonomy, classes_taxonomic
-
-
-class INAT(data.Dataset):
-    def __init__(self, root, ann_file, transform):
-        # load annotations
-        print("Loading annotations from: " + os.path.basename(ann_file))
-        with open(ann_file) as data_file:
-            ann_data = json.load(data_file)
-
-        # set up the filenames and annotations
-        self.imgs = [aa["file_name"] for aa in ann_data["images"]]
-        self.ids = [aa["id"] for aa in ann_data["images"]]
-
-        # if we dont have class labels set them to '0'
-        if "annotations" in ann_data.keys():
-            self.classes = [aa["category_id"] for aa in ann_data["annotations"]]
-        else:
-            self.classes = [0] * len(self.imgs)
-
-        # print out some stats
-        print("\t" + str(len(self.imgs)) + " images")
-        print("\t" + str(len(set(self.classes))) + " classes")
-
-        self.root = root
-        self.loader = default_loader
-
-        # augmentation params
-        self.transform = transform
-
-    def __getitem__(self, index):
-        path = self.root + self.imgs[index]
-        img = self.loader(path)
-        species_id = self.classes[index]
-
-        img = self.transform(img)
-
-        return img, species_id
-
-    def __len__(self):
-        return len(self.imgs)
diff --git a/simsiam/linear_eval.py b/simsiam/linear_eval.py
deleted file mode 100644
index e42c097..0000000
--- a/simsiam/linear_eval.py
+++ /dev/null
@@ -1,807 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-import argparse
-import builtins
-import math
-import os
-import random
-import shutil
-import time
-import warnings
-from datetime import datetime
-
-import torch
-import torch.backends.cudnn as cudnn
-import torch.distributed as dist
-import torch.multiprocessing as mp
-import torch.nn as nn
-import torch.nn.parallel
-import torch.optim
-import torch.utils.data
-import torch.utils.data.distributed
-import torchvision.datasets as datasets
-import torchvision.models as models
-import torchvision.transforms as transforms
-from tqdm import tqdm
-from data_generation.icgan.data_utils import utils as data_utils
-
-from inatural_dataset import INAT
-
-
-model_names = sorted(
-    name
-    for name in models.__dict__
-    if name.islower() and not name.startswith("__") and callable(models.__dict__[name])
-)
-
-parser = argparse.ArgumentParser(description="PyTorch ImageNet Training")
-parser.add_argument(
-    "--data",
-    metavar="DIR",
-    default="/scratch/ssd004/datasets/imagenet256",
-    help="path to dataset.",
-)
-parser.add_argument(
-    "-a",
-    "--arch",
-    metavar="ARCH",
-    default="resnet50",
-    choices=model_names,
-    help="model architecture: " + " | ".join(model_names) + " (default: resnet50)",
-)
-parser.add_argument(
-    "-j",
-    "--workers",
-    default=4,
-    type=int,
-    metavar="N",
-    help="number of data loading workers (default: 32)",
-)
-parser.add_argument(
-    "--epochs", default=90, type=int, metavar="N", help="number of total epochs to run"
-)
-parser.add_argument(
-    "-b",
-    "--batch-size",
-    default=4096,
-    type=int,
-    metavar="N",
-    help="mini-batch size (default: 4096), this is the total "
-    "batch size of all GPUs on the current node when "
-    "using Data Parallel or Distributed Data Parallel",
-)
-parser.add_argument(
-    "--lr",
-    "--learning-rate",
-    default=0.1,
-    type=float,
-    metavar="LR",
-    help="initial (base) learning rate",
-    dest="lr",
-)
-parser.add_argument("--momentum", default=0.9, type=float, metavar="M", help="momentum")
-parser.add_argument(
-    "--wd",
-    "--weight-decay",
-    default=0.0,
-    type=float,
-    metavar="W",
-    help="weight decay (default: 0.)",
-    dest="weight_decay",
-)
-parser.add_argument(
-    "-p",
-    "--print-freq",
-    default=10,
-    type=int,
-    metavar="N",
-    help="print frequency (default: 10)",
-)
-parser.add_argument(
-    "--resume",
-    default="",
-    type=str,
-    metavar="PATH",
-    help="path to latest checkpoint (default: none)",
-)
-parser.add_argument(
-    "-e",
-    "--evaluate",
-    dest="evaluate",
-    action="store_true",
-    help="evaluate model on validation set",
-)
-parser.add_argument(
-    "--world-size",
-    default=-1,
-    type=int,
-    help="number of nodes for distributed training",
-)
-parser.add_argument(
-    "--rank", default=-1, type=int, help="node rank for distributed training"
-)
-parser.add_argument(
-    "--dist-url",
-    default="tcp://224.66.41.62:23456",
-    type=str,
-    help="url used to set up distributed training",
-)
-parser.add_argument(
-    "--dist-backend", default="nccl", type=str, help="distributed backend"
-)
-parser.add_argument(
-    "--seed", default=None, type=int, help="seed for initializing training. "
-)
-parser.add_argument("--gpu", default=None, type=int, help="GPU id to use.")
-parser.add_argument(
-    "--multiprocessing-distributed",
-    action="store_true",
-    help="Use multi-processing distributed training to launch "
-    "N processes per node, which has N GPUs. This is the "
-    "fastest way to use PyTorch for either single node or "
-    "multi node data parallel training",
-)
-
-# additional configs:
-parser.add_argument(
-    "--pretrained", default="", type=str, help="path to simsiam pretrained checkpoint"
-)
-parser.add_argument("--lars", action="store_true", help="Use LARS")
-
-parser.add_argument("--dataset_name", default="imagenet", help="Name of the dataset.")
-
-parser.add_argument(
-    "--checkpoint_dir",
-    default="/projects/imagenet_synthetic/model_checkpoints",
-    help="Checkpoint root directory.",
-)
-
-parser.add_argument(
-    "--num_classes",
-    default=1000,
-    type=int,
-    help="Number of classes in the dataset.",
-)
-
-parser.add_argument(
-    "--ablation_mode",
-    default="icgan",
-    type=str,
-    help="Using icgan or stable diffusion feature extractor for ablation study.",
-)
-
-best_acc1 = 0
-
-
-def main():
-    args = parser.parse_args()
-    current_time = datetime.now().strftime("%Y-%m-%d-%H-%M")
-    args.checkpoint_dir = os.path.join(args.checkpoint_dir, f"eval_{current_time}")
-    os.makedirs(args.checkpoint_dir, exist_ok=True)
-
-    print(args)
-
-    if args.seed is not None:
-        random.seed(args.seed)
-        torch.manual_seed(args.seed)
-        # NOTE: this line can reduce speed considerably
-        cudnn.deterministic = True
-        warnings.warn(
-            "You have chosen to seed training. "
-            "This will turn on the CUDNN deterministic setting, "
-            "which can slow down your training considerably! "
-            "You may see unexpected behavior when restarting "
-            "from checkpoints."
-        )
-
-    if args.gpu is not None:
-        warnings.warn(
-            "You have chosen a specific GPU. This will completely "
-            "disable data parallelism."
-        )
-
-    if args.dist_url == "env://" and args.world_size == -1:
-        args.world_size = int(os.environ["WORLD_SIZE"])
-    args.distributed = args.world_size > 1 or args.multiprocessing_distributed
-
-    ngpus_per_node = torch.cuda.device_count()
-    if args.multiprocessing_distributed:
-        # Since we have ngpus_per_node processes per node, the total world_size
-        # needs to be adjusted accordingly
-        args.world_size = ngpus_per_node * args.world_size
-        # Use torch.multiprocessing.spawn to launch distributed processes: the
-        # main_worker process function
-        mp.spawn(
-            main_worker,
-            nprocs=ngpus_per_node,
-            args=(
-                ngpus_per_node,
-                args,
-            ),
-        )
-    else:
-        # Simply call main_worker function
-        main_worker(args.gpu, ngpus_per_node, args)
-
-
-def main_worker(gpu, ngpus_per_node, args):
-    global best_acc1
-    args.gpu = gpu
-
-    # suppress printing if not master
-    if args.multiprocessing_distributed and args.gpu != 0:
-
-        def print_pass(*args, flush=True):
-            pass
-
-        builtins.print = print_pass
-
-    if args.gpu is not None:
-        print("Use GPU: {} for training".format(args.gpu), flush=True)
-
-    if args.distributed:
-        if args.dist_url == "env://" and args.rank == -1:
-            args.rank = int(os.environ["RANK"])
-        if args.multiprocessing_distributed:
-            # For multiprocessing distributed training, rank needs to be the
-            # global rank among all the processes
-            args.rank = args.rank * ngpus_per_node + gpu
-        dist.init_process_group(
-            backend=args.dist_backend,
-            init_method=args.dist_url,
-            world_size=args.world_size,
-            rank=args.rank,
-        )
-        torch.distributed.barrier()
-
-    # create model
-    print("=> creating model '{}'".format(args.arch), flush=True)
-    model = models.__dict__[args.arch]()
-
-    model.fc = nn.Linear(2048, args.num_classes)
-
-    # freeze all layers but the last fc
-    for name, param in model.named_parameters():
-        if name not in ["fc.weight", "fc.bias"]:
-            param.requires_grad = False
-    # init the fc layer
-    model.fc.weight.data.normal_(mean=0.0, std=0.01)
-    model.fc.bias.data.zero_()
-
-    # load from pre-trained, before DistributedDataParallel constructor
-    if args.pretrained:
-        if os.path.isfile(args.pretrained):
-            print("=> loading checkpoint '{}'".format(args.pretrained), flush=True)
-            checkpoint = torch.load(args.pretrained, map_location="cpu")
-
-            # rename moco pre-trained keys
-            state_dict = checkpoint["state_dict"]
-            for k in list(state_dict.keys()):
-                # retain only encoder up to before the embedding layer
-                if k.startswith("module.encoder") and not k.startswith(
-                    "module.encoder.fc"
-                ):
-                    # remove prefix
-                    state_dict[k[len("module.encoder.") :]] = state_dict[k]
-                # delete renamed or unused k
-                del state_dict[k]
-
-            args.start_epoch = 0
-            msg = model.load_state_dict(state_dict, strict=False)
-            assert set(msg.missing_keys) == {"fc.weight", "fc.bias"}
-
-            print("=> loaded pre-trained model '{}'".format(args.pretrained))
-        else:
-            print("=> no checkpoint found at '{}'".format(args.pretrained))
-
-    # infer learning rate before changing batch size
-    init_lr = args.lr * args.batch_size / 256
-
-    if args.distributed:
-        # For multiprocessing distributed, DistributedDataParallel constructor
-        # should always set the single device scope, otherwise,
-        # DistributedDataParallel will use all available devices.
-        if args.gpu is not None:
-            torch.cuda.set_device(args.gpu)
-            model.cuda(args.gpu)
-            # When using a single GPU per process and per
-            # DistributedDataParallel, we need to divide the batch size
-            # ourselves based on the total number of GPUs we have
-            args.batch_size = int(args.batch_size / ngpus_per_node)
-            args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
-            model = torch.nn.parallel.DistributedDataParallel(
-                model, device_ids=[args.gpu]
-            )
-        else:
-            model.cuda()
-            # DistributedDataParallel will divide and allocate batch_size to all
-            # available GPUs if device_ids are not set
-            model = torch.nn.parallel.DistributedDataParallel(model)
-    elif args.gpu is not None:
-        torch.cuda.set_device(args.gpu)
-        model = model.cuda(args.gpu)
-    else:
-        # DataParallel will divide and allocate batch_size to all available GPUs
-        if args.arch.startswith("alexnet") or args.arch.startswith("vgg"):
-            model.features = torch.nn.DataParallel(model.features)
-            model.cuda()
-        else:
-            model = torch.nn.DataParallel(model).cuda()
-
-    # define loss function (criterion) and optimizer
-    criterion = nn.CrossEntropyLoss().cuda(args.gpu)
-
-    # optimize only the linear classifier
-    parameters = list(filter(lambda p: p.requires_grad, model.parameters()))
-    assert len(parameters) == 2  # fc.weight, fc.bias
-
-    optimizer = torch.optim.SGD(
-        parameters, init_lr, momentum=args.momentum, weight_decay=args.weight_decay
-    )
-    if args.lars:
-        print("=> use LARS optimizer.", flush=True)
-        from LARC import LARC
-
-        optimizer = LARC(optimizer=optimizer, trust_coefficient=0.001, clip=False)
-
-    # optionally resume from a checkpoint
-    if args.resume:
-        if os.path.isfile(args.resume):
-            print("=> loading checkpoint '{}'".format(args.resume), flush=True)
-            if args.gpu is None:
-                checkpoint = torch.load(args.resume)
-            else:
-                # Map model to be loaded to specified single gpu.
-                loc = "cuda:{}".format(args.gpu)
-                checkpoint = torch.load(args.resume, map_location=loc)
-            args.start_epoch = checkpoint["epoch"]
-            best_acc1 = checkpoint["best_acc1"]
-            if args.gpu is not None:
-                # best_acc1 may be from a checkpoint from a different GPU
-                best_acc1 = best_acc1.to(args.gpu)
-            model.load_state_dict(checkpoint["state_dict"])
-            optimizer.load_state_dict(checkpoint["optimizer"])
-            print(
-                "=> loaded checkpoint '{}' (epoch {})".format(
-                    args.resume, checkpoint["epoch"]
-                ),
-                flush=True,
-            )
-        else:
-            print("=> no checkpoint found at '{}'".format(args.resume))
-
-    cudnn.benchmark = True
-
-    # Data loading code
-    traindir = os.path.join(args.data, "train")
-    valdir = os.path.join(args.data, "val")
-    normalize = transforms.Normalize(
-        mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
-    )
-
-    if args.dataset_name == "imagenet":
-        train_dataset = datasets.ImageFolder(
-            traindir,
-            transforms.Compose(
-                [
-                    transforms.RandomResizedCrop(224),
-                    transforms.RandomHorizontalFlip(),
-                    transforms.ToTensor(),
-                    normalize,
-                ]
-            ),
-        )
-        val_dataset = datasets.ImageFolder(
-            valdir,
-            transforms.Compose(
-                [
-                    transforms.Resize(256),
-                    transforms.CenterCrop(224),
-                    transforms.ToTensor(),
-                    normalize,
-                ]
-            ),
-        )
-    elif args.dataset_name == "food101":
-        print("=> using food101 dataset.", flush=True)
-        train_dataset = datasets.Food101(
-            root=args.data,
-            split="train",
-            transform=transforms.Compose(
-                [
-                    transforms.RandomResizedCrop(224),
-                    transforms.RandomHorizontalFlip(),
-                    transforms.ToTensor(),
-                    normalize,
-                ],
-            ),
-        )
-        val_dataset = datasets.Food101(
-            root=args.data,
-            split="test",
-            transform=transforms.Compose(
-                [
-                    transforms.Resize(256),
-                    transforms.CenterCrop(224),
-                    transforms.ToTensor(),
-                    normalize,
-                ],
-            ),
-        )
-    elif args.dataset_name == "cifar10":
-        train_dataset = datasets.CIFAR10(
-            root=args.data,
-            train=True,
-            download=True,
-            transform=transforms.Compose(
-                [
-                    transforms.RandomResizedCrop(224),
-                    transforms.RandomHorizontalFlip(),
-                    transforms.ToTensor(),
-                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
-                ],
-            ),
-        )
-        val_dataset = datasets.CIFAR10(
-            root=args.data,
-            train=False,
-            download=True,
-            transform=transforms.Compose(
-                [
-                    transforms.Resize(256),
-                    transforms.CenterCrop(224),
-                    transforms.ToTensor(),
-                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
-                ],
-            ),
-        )
-    elif args.dataset_name == "cifar100":
-        train_dataset = datasets.CIFAR100(
-            root=args.data,
-            train=True,
-            transform=transforms.Compose(
-                [
-                    transforms.RandomResizedCrop(224),
-                    transforms.RandomHorizontalFlip(),
-                    transforms.ToTensor(),
-                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
-                ],
-            ),
-        )
-        val_dataset = datasets.CIFAR100(
-            root=args.data,
-            train=False,
-            transform=transforms.Compose(
-                [
-                    transforms.Resize(256),
-                    transforms.CenterCrop(224),
-                    transforms.ToTensor(),
-                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
-                ],
-            ),
-        )
-    elif args.dataset_name == "places365":
-        train_dataset = datasets.Places365(
-            root=args.data,
-            split="train-standard",
-            transform=transforms.Compose(
-                [
-                    transforms.RandomResizedCrop(224),
-                    transforms.RandomHorizontalFlip(),
-                    transforms.ToTensor(),
-                    normalize,
-                ],
-            ),
-        )
-        val_dataset = datasets.Places365(
-            root=args.data,
-            split="val",
-            transform=transforms.Compose(
-                [
-                    transforms.Resize(256),
-                    transforms.CenterCrop(224),
-                    transforms.ToTensor(),
-                    normalize,
-                ],
-            ),
-        )
-    elif args.dataset_name == "INaturalist":
-        train_dataset = INAT(
-            root=args.data,
-            ann_file=os.path.join(args.data, "train2018.json"),
-            transform=transforms.Compose(
-                [
-                    transforms.RandomResizedCrop(224),
-                    transforms.RandomHorizontalFlip(),
-                    transforms.ToTensor(),
-                    normalize,
-                ],
-            ),
-        )
-        val_dataset = INAT(
-            root=args.data,
-            ann_file=os.path.join(args.data, "val2018.json"),
-            transform=transforms.Compose(
-                [
-                    transforms.Resize(256),
-                    transforms.CenterCrop(224),
-                    transforms.ToTensor(),
-                    normalize,
-                ],
-            ),
-        )
-
-    if args.distributed:
-        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
-    else:
-        train_sampler = None
-
-    train_loader = torch.utils.data.DataLoader(
-        train_dataset,
-        batch_size=args.batch_size,
-        shuffle=(train_sampler is None),
-        num_workers=args.workers,
-        pin_memory=True,
-        sampler=train_sampler,
-    )
-
-    val_loader = torch.utils.data.DataLoader(
-        val_dataset,
-        batch_size=256,
-        shuffle=False,
-        num_workers=args.workers,
-        pin_memory=True,
-    )
-
-    if args.evaluate:
-        validate(val_loader, model, criterion, args)
-        return
-
-    for epoch in range(args.start_epoch, args.epochs):
-        if args.distributed:
-            train_sampler.set_epoch(epoch)
-        adjust_learning_rate(optimizer, init_lr, epoch, args)
-
-        # train for one epoch
-        train(train_loader, model, criterion, optimizer, epoch, args)
-
-        # evaluate on validation set
-        acc1 = validate(val_loader, model, criterion, args)
-
-        # remember best acc@1 and save checkpoint
-        is_best = acc1 > best_acc1
-        best_acc1 = max(acc1, best_acc1)
-
-        if not args.multiprocessing_distributed or (
-            args.multiprocessing_distributed and args.rank % ngpus_per_node == 0
-        ):
-            checkpoint_name = "checkpoint_{:04d}.pth.tar".format(epoch + 1)
-            checkpoint_file = os.path.join(args.checkpoint_dir, checkpoint_name)
-            save_checkpoint(
-                {
-                    "epoch": epoch + 1,
-                    "arch": args.arch,
-                    "state_dict": model.state_dict(),
-                    "best_acc1": best_acc1,
-                    "optimizer": optimizer.state_dict(),
-                },
-                is_best,
-                filename=checkpoint_file,
-            )
-            if epoch == args.start_epoch:
-                sanity_check(model.state_dict(), args.pretrained)
-
-
-def train(train_loader, model, criterion, optimizer, epoch, args):
-    batch_time = AverageMeter("Time", ":6.3f")
-    data_time = AverageMeter("Data", ":6.3f")
-    losses = AverageMeter("Loss", ":.4e")
-    top1 = AverageMeter("Acc@1", ":6.2f")
-    top5 = AverageMeter("Acc@5", ":6.2f")
-    progress = ProgressMeter(
-        len(train_loader),
-        [batch_time, data_time, losses, top1, top5],
-        prefix="Epoch: [{}]".format(epoch),
-    )
-
-    """
-    Switch to eval mode:
-    Under the protocol of linear classification on frozen features/models,
-    it is not legitimate to change any part of the pre-trained model.
-    BatchNorm in train mode may revise running mean/std (even if it receives
-    no gradient), which are part of the model parameters too.
-    """
-    model.eval()
-
-    end = time.time()
-    i = 0
-    for images, target in tqdm(train_loader):
-        # measure data loading time
-        data_time.update(time.time() - end)
-
-        if args.gpu is not None:
-            images = images.cuda(args.gpu, non_blocking=True)
-        target = target.cuda(args.gpu, non_blocking=True)
-
-        # compute output
-        output = model(images)
-        loss = criterion(output, target)
-
-        # measure accuracy and record loss
-        acc1, acc5 = accuracy(output, target, topk=(1, 5))
-        losses.update(loss.item(), images.size(0))
-        top1.update(acc1[0], images.size(0))
-        top5.update(acc5[0], images.size(0))
-
-        # compute gradient and do SGD step
-        optimizer.zero_grad()
-        loss.backward()
-        optimizer.step()
-
-        # measure elapsed time
-        batch_time.update(time.time() - end)
-        end = time.time()
-
-        if i % args.print_freq == 0:
-            progress.display(i)
-
-        i += 1
-
-
-def validate(val_loader, model, criterion, args):
-    batch_time = AverageMeter("Time", ":6.3f")
-    losses = AverageMeter("Loss", ":.4e")
-    top1 = AverageMeter("Acc@1", ":6.2f")
-    top5 = AverageMeter("Acc@5", ":6.2f")
-    progress = ProgressMeter(
-        len(val_loader), [batch_time, losses, top1, top5], prefix="Test: "
-    )
-
-    # switch to evaluate mode
-    model.eval()
-
-    with torch.no_grad():
-        end = time.time()
-        i = 0
-        for images, target in tqdm(val_loader):
-            if args.gpu is not None:
-                images = images.cuda(args.gpu, non_blocking=True)
-            target = target.cuda(args.gpu, non_blocking=True)
-
-            # compute output
-            output = model(images)
-            loss = criterion(output, target)
-
-            # measure accuracy and record loss
-            acc1, acc5 = accuracy(output, target, topk=(1, 5))
-            losses.update(loss.item(), images.size(0))
-            top1.update(acc1[0], images.size(0))
-            top5.update(acc5[0], images.size(0))
-
-            # measure elapsed time
-            batch_time.update(time.time() - end)
-            end = time.time()
-
-            if i % args.print_freq == 0:
-                progress.display(i)
-
-            i += 1
-
-        # # TODO: this should also be done with the ProgressMeter
-        print(
-            "\n * Accuracy@1 {top1.avg:.3f} Accuracy@5 {top5.avg:.3f}".format(
-                top1=top1, top5=top5
-            )
-        )
-
-    return top1.avg
-
-
-def save_checkpoint(state, is_best, filename="checkpoint.pth.tar"):
-    torch.save(state, filename)
-    if is_best:
-        shutil.copyfile(filename, "model_best.pth.tar")
-
-
-def sanity_check(state_dict, pretrained_weights):
-    """
-    Linear classifier should not change any weights other than the linear layer.
-    This sanity check asserts nothing wrong happens (e.g., BN stats updated).
-    """
-    print("=> loading '{}' for sanity check".format(pretrained_weights))
-    checkpoint = torch.load(pretrained_weights, map_location="cpu")
-
-    state_dict_pre = checkpoint["state_dict"]
-
-    for k in list(state_dict.keys()):
-        # only ignore fc layer
-        if "fc.weight" in k or "fc.bias" in k:
-            continue
-
-        # name in pretrained model
-        k_pre = (
-            "module.encoder." + k[len("module.") :]
-            if k.startswith("module.")
-            else "module.encoder." + k
-        )
-
-        assert (
-            state_dict[k].cpu() == state_dict_pre[k_pre]
-        ).all(), "{} is changed in linear classifier training.".format(k)
-
-    print("=> sanity check passed.")
-
-
-class AverageMeter(object):
-    """Computes and stores the average and current value"""
-
-    def __init__(self, name, fmt=":f"):
-        self.name = name
-        self.fmt = fmt
-        self.reset()
-
-    def reset(self):
-        self.val = 0
-        self.avg = 0
-        self.sum = 0
-        self.count = 0
-
-    def update(self, val, n=1):
-        self.val = val
-        self.sum += val * n
-        self.count += n
-        self.avg = self.sum / self.count
-
-    def __str__(self):
-        fmtstr = "{name} {val" + self.fmt + "} ({avg" + self.fmt + "})"
-        return fmtstr.format(**self.__dict__)
-
-
-class ProgressMeter(object):
-    def __init__(self, num_batches, meters, prefix=""):
-        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
-        self.meters = meters
-        self.prefix = prefix
-
-    def display(self, batch):
-        entries = [self.prefix + self.batch_fmtstr.format(batch)]
-        entries += [str(meter) for meter in self.meters]
-        print("\t".join(entries), flush=True)
-
-    def _get_batch_fmtstr(self, num_batches):
-        num_digits = len(str(num_batches // 1))
-        fmt = "{:" + str(num_digits) + "d}"
-        return "[" + fmt + "/" + fmt.format(num_batches) + "]"
-
-
-def adjust_learning_rate(optimizer, init_lr, epoch, args):
-    """Decay the learning rate based on schedule"""
-    cur_lr = init_lr * 0.5 * (1.0 + math.cos(math.pi * epoch / args.epochs))
-    for param_group in optimizer.param_groups:
-        param_group["lr"] = cur_lr
-
-
-def accuracy(output, target, topk=(1,)):
-    """Computes the accuracy over the k top predictions for the specified values of k"""
-    with torch.no_grad():
-        maxk = max(topk)
-        batch_size = target.size(0)
-
-        _, pred = output.topk(maxk, 1, True, True)
-        pred = pred.t()
-        correct = pred.eq(target.view(1, -1).expand_as(pred))
-
-        res = []
-        for k in topk:
-            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
-            res.append(correct_k.mul_(100.0 / batch_size))
-        return res
-
-
-if __name__ == "__main__":
-    main()
diff --git a/simsiam/loader.py b/simsiam/loader.py
deleted file mode 100644
index 5b53049..0000000
--- a/simsiam/loader.py
+++ /dev/null
@@ -1,124 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-import os
-import random
-
-import torch
-from PIL import Image, ImageFilter
-from torchvision import datasets, transforms
-
-
-class GaussianBlur(object):
-    """Gaussian blur augmentation in SimCLR https://arxiv.org/abs/2002.05709."""
-
-    def __init__(self, sigma=[0.1, 2.0]):
-        self.sigma = sigma
-
-    def __call__(self, x):
-        sigma = random.uniform(self.sigma[0], self.sigma[1])
-        x = x.filter(ImageFilter.GaussianBlur(radius=sigma))
-        return x
-
-
-_normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-
-# MoCo v2's aug: similar to SimCLR https://arxiv.org/abs/2002.05709
-_real_augmentations = [
-    transforms.RandomResizedCrop(224, scale=(0.2, 1.0)),
-    transforms.RandomApply(
-        [transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)],  # not strengthened
-        p=0.8,
-    ),
-    transforms.RandomGrayscale(p=0.2),
-    transforms.RandomApply([GaussianBlur([0.1, 2.0])], p=0.5),
-    transforms.RandomHorizontalFlip(),
-    transforms.ToTensor(),
-    _normalize,
-]
-
-
-class TwoCropsTransform:
-    """Take two random crops of one image as the query and key."""
-
-    def __init__(self):
-        self.base_transform = transforms.Compose(_real_augmentations)
-
-    def __call__(self, x):
-        q = self.base_transform(x)
-        k = self.base_transform(x)
-        return [q, k]
-
-
-class ImageNetSynthetic(datasets.ImageNet):
-    def __init__(
-        self,
-        imagenet_root,
-        imagenet_synthetic_root,
-        index_min=0,
-        index_max=9,
-        generative_augmentation_prob=None,
-        load_one_real_image=False,
-        split="train",
-    ):
-        super(ImageNetSynthetic, self).__init__(
-            root=imagenet_root,
-            split=split,
-        )
-        self.imagenet_root = imagenet_root
-        self.imagenet_synthetic_root = imagenet_synthetic_root
-        self.index_min = index_min
-        self.index_max = index_max
-        self.generative_augmentation_prob = generative_augmentation_prob
-        self.load_one_real_image = load_one_real_image
-        self.real_transforms = transforms.Compose(_real_augmentations)
-        # Remove random crop for synthetic image augmentation.
-        self.synthetic_transforms = transforms.Compose(_real_augmentations[1:])
-        self.split = split
-
-    def __getitem__(self, index):
-        imagenet_filename, label = self.imgs[index]
-
-        def _synthetic_image(filename):
-            rand_int = random.randint(self.index_min, self.index_max)
-            filename_and_extension = filename.split("/")[-1]
-            filename_parent_dir = filename.split("/")[-2]
-            image_path = os.path.join(
-                self.imagenet_synthetic_root,
-                self.split,
-                filename_parent_dir,
-                filename_and_extension.split(".")[0] + f"_{rand_int}.JPEG",
-            )
-            return Image.open(image_path).convert("RGB")
-
-        if self.generative_augmentation_prob is not None:
-            if torch.rand(1) < self.generative_augmentation_prob:
-                # Generate a synthetic image.
-                image1 = _synthetic_image(imagenet_filename)
-                image1 = self.synthetic_transforms(image1)
-            else:
-                image1 = self.loader(os.path.join(self.root, imagenet_filename))
-                image1 = self.real_transforms(image1)
-
-            if torch.rand(1) < self.generative_augmentation_prob:
-                # Generate another synthetic image.
-                image2 = _synthetic_image(imagenet_filename)
-                image2 = self.synthetic_transforms(image2)
-            else:
-                image2 = self.loader(os.path.join(self.root, imagenet_filename))
-                image2 = self.real_transforms(image2)
-        else:
-            if self.load_one_real_image:
-                image1 = self.loader(os.path.join(self.root, imagenet_filename))
-                image1 = self.real_transforms(image1)
-            else:
-                image1 = _synthetic_image(imagenet_filename)
-                image1 = self.synthetic_transforms(image1)
-            # image2 is always synthetic.
-            image2 = _synthetic_image(imagenet_filename)
-            image2 = self.synthetic_transforms(image2)
-
-        return [image1, image2], label
diff --git a/simsiam/temp.py b/simsiam/temp.py
deleted file mode 100644
index e69de29..0000000
diff --git a/simsiam/train_simsiam.py b/simsiam/train_simsiam.py
deleted file mode 100644
index 4896c2e..0000000
--- a/simsiam/train_simsiam.py
+++ /dev/null
@@ -1,438 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-import argparse
-import math
-import os
-import random
-from datetime import datetime
-from functools import partial
-
-import torch
-import torch.nn.parallel
-import torch.optim
-import torch.utils.data
-import torch.utils.data.distributed
-from torch import distributed as dist
-from torch import nn
-from torch.backends import cudnn
-from torch.nn.parallel import DistributedDataParallel as DDP  # noqa: N817
-from torch.utils.data.distributed import DistributedSampler
-from torchvision import datasets, models
-from tqdm import tqdm
-
-from simsiam import distributed as dist_utils
-from simsiam import builder, loader
-
-
-model_names = sorted(
-    name
-    for name in models.__dict__
-    if name.islower() and not name.startswith("__") and callable(models.__dict__[name])
-)
-
-parser = argparse.ArgumentParser(description="PyTorch ImageNet Training")
-parser.add_argument(
-    "--data_dir",
-    metavar="DIR",
-    default="/scratch/ssd004/datasets/imagenet256",
-    help="path to dataset.",
-)
-parser.add_argument(
-    "-a",
-    "--arch",
-    metavar="ARCH",
-    default="resnet50",
-    choices=model_names,
-    help="model architecture: " + " | ".join(model_names) + " (default: resnet50)",
-)
-parser.add_argument(
-    "-j",
-    "--num_workers",
-    default=4,
-    type=int,
-    metavar="N",
-    help="number of data loading workers (default: 32)",
-)
-parser.add_argument(
-    "--epochs", default=100, type=int, metavar="N", help="number of total epochs to run"
-)
-parser.add_argument(
-    "-b",
-    "--batch-size",
-    default=256,
-    type=int,
-    metavar="N",
-    help="mini-batch size (default: 512), this is the total "
-    "batch size of all GPUs on the current node when "
-    "using Data Parallel or Distributed Data Parallel",
-)
-parser.add_argument(
-    "--lr",
-    "--learning-rate",
-    default=0.05,
-    type=float,
-    metavar="LR",
-    help="initial (base) learning rate",
-    dest="lr",
-)
-parser.add_argument(
-    "--momentum", default=0.9, type=float, metavar="M", help="momentum of SGD solver"
-)
-parser.add_argument(
-    "--wd",
-    "--weight-decay",
-    default=1e-4,
-    type=float,
-    metavar="W",
-    help="weight decay (default: 1e-4)",
-    dest="weight_decay",
-)
-parser.add_argument(
-    "--resume_from_checkpoint",
-    default="",
-    type=str,
-    help="Path to latest checkpoint.",
-)
-parser.add_argument(
-    "--seed", default=42, type=int, help="seed for initializing training. "
-)
-
-# simsiam specific configs:
-parser.add_argument(
-    "--dim", default=2048, type=int, help="feature dimension (default: 2048)"
-)
-parser.add_argument(
-    "--pred-dim",
-    default=512,
-    type=int,
-    help="hidden dimension of the predictor (default: 512)",
-)
-parser.add_argument(
-    "--fix-pred-lr", action="store_true", help="Fix learning rate for the predictor"
-)
-
-parser.add_argument(
-    "--distributed_mode",
-    action="store_true",
-    help="Enable distributed training",
-)
-parser.add_argument("--distributed_launcher", default="slurm")
-parser.add_argument("--distributed_backend", default="nccl")
-parser.add_argument(
-    "--checkpoint_dir",
-    default="/projects/imagenet_synthetic/model_checkpoints",
-    help="Checkpoint root directory.",
-)
-parser.add_argument(
-    "--experiment",
-    default="",
-    help="Experiment name.",
-)
-parser.add_argument(
-    "--use_synthetic_data",
-    action=argparse.BooleanOptionalAction,
-    help="Whether to use real data or synthetic data for training.",
-)
-parser.add_argument(
-    "--synthetic_data_dir",
-    default="/projects/imagenet_synthetic/",
-    help="Path to the root of synthetic data.",
-)
-parser.add_argument(
-    "--synthetic_index_min",
-    default=0,
-    type=int,
-    help="Synthetic data files are named filename_i.JPEG. This index determines the lower bound for i.",
-)
-parser.add_argument(
-    "--synthetic_index_max",
-    default=9,
-    type=int,
-    help="Synthetic data files are named filename_i.JPEG. This index determines the upper bound for i.",
-)
-parser.add_argument(
-    "--generative_augmentation_prob",
-    default=None,
-    type=float,
-    help="The probability of applying a generative model augmentation to a view. Applies to the views separately.",
-)
-parser.add_argument(
-    "-p",
-    "--print-freq",
-    default=10,
-    type=int,
-    metavar="N",
-    help="print frequency (default: 10)",
-)
-
-
-def worker_init_fn(worker_id: int, num_workers: int, rank: int, seed: int) -> None:
-    """Initialize worker processes with a random seed.
-
-    Parameters
-    ----------
-    worker_id : int
-        ID of the worker process.
-    num_workers : int
-        Total number of workers that will be initialized.
-    rank : int
-        The rank of the current process.
-    seed : int
-        A random seed used determine the worker seed.
-    """
-    worker_seed = num_workers * rank + worker_id + seed
-    torch.manual_seed(worker_seed)
-    random.seed(worker_seed)
-
-
-def setup() -> None:
-    """Initialize the process group."""
-    dist.init_process_group("nccl")
-
-
-def cleanup() -> None:
-    """Clean up the process group after training."""
-    dist.destroy_process_group()
-
-
-def main():
-    args = parser.parse_args()
-    current_time = datetime.now().strftime("%Y-%m-%d-%H-%M")
-    checkpoint_subdir = (
-        f"{args.experiment}_{current_time}" if args.experiment else f"{current_time}"
-    )
-    args.checkpoint_dir = os.path.join(args.checkpoint_dir, checkpoint_subdir)
-    os.makedirs(args.checkpoint_dir, exist_ok=True)
-
-    print(args)
-
-    # torch.multiprocessing.set_start_method("spawn")
-    # torch.multiprocessing.set_start_method("spawn")
-    if args.distributed_mode:
-        # dist_utils.init_distributed_mode(
-        #     launcher=args.distributed_launcher,
-        #     backend=args.distributed_backend,
-        # )
-        setup()
-        torch.cuda.set_device(int(os.environ["LOCAL_RANK"]))
-        torch.cuda.empty_cache()
-        device_id = torch.cuda.current_device()
-    else:
-        device_id = None
-
-    # Data loading.
-    if args.use_synthetic_data:
-        print(
-            f"Using synthetic data for training at {args.synthetic_data_dir} between indices {args.synthetic_index_min} and {args.synthetic_index_max}."
-        )
-        train_dataset = loader.ImageNetSynthetic(
-            args.data_dir,
-            args.synthetic_data_dir,
-            index_min=args.synthetic_index_min,
-            index_max=args.synthetic_index_max,
-            generative_augmentation_prob=args.generative_augmentation_prob,
-        )
-    else:
-        print(f"Using real data for training at {args.data_dir}.")
-        train_data_dir = os.path.join(args.data_dir, "train")
-        train_dataset = datasets.ImageFolder(train_data_dir, loader.TwoCropsTransform())
-
-    train_sampler = None
-    if dist_utils.is_dist_avail_and_initialized() and args.distributed_mode:
-        train_sampler = DistributedSampler(
-            train_dataset,
-            seed=args.seed,
-            drop_last=True,
-        )
-    init_fn = partial(
-        worker_init_fn,
-        num_workers=args.num_workers,
-        rank=dist_utils.get_rank(),
-        seed=args.seed,
-    )
-
-    train_loader = torch.utils.data.DataLoader(
-        train_dataset,
-        batch_size=args.batch_size,
-        shuffle=(train_sampler is None),
-        sampler=train_sampler,
-        num_workers=args.num_workers,
-        worker_init_fn=init_fn,
-        pin_memory=False,
-        drop_last=True,
-    )
-    if dist_utils.get_rank() == 0:
-        print(f"Creating model {args.arch}")
-    model = builder.SimSiam(models.__dict__[args.arch], args.dim, args.pred_dim)
-
-    if args.distributed_mode and dist_utils.is_dist_avail_and_initialized():
-        # Apply SyncBN
-        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
-        # set the single device scope, otherwise DistributedDataParallel will
-        # use all available devices
-        # torch.cuda.set_device(device_id)
-        model = model.cuda(device_id)
-        model = DDP(model, device_ids=[device_id])
-    else:
-        raise NotImplementedError("Only DistributedDataParallel is supported.")
-    if dist_utils.get_rank() == 0:
-        print(model)  # print model after SyncBatchNorm
-
-    # define loss function (criterion) and optimizer
-    criterion = nn.CosineSimilarity(dim=1).cuda(device_id)
-
-    if args.fix_pred_lr:
-        optim_params = [
-            {"params": model.module.encoder.parameters(), "fix_lr": False},
-            {"params": model.module.predictor.parameters(), "fix_lr": True},
-        ]
-    else:
-        optim_params = model.parameters()
-
-    # infer learning rate before changing batch size
-    # init_lr = args.lr * args.batch_size / 256.0
-    # TODO Hard-code init-lr to match the original paper with bs=512.
-    init_lr = args.lr * 2.0
-
-    optimizer = torch.optim.SGD(
-        optim_params,
-        init_lr,
-        momentum=args.momentum,
-        weight_decay=args.weight_decay,
-    )
-
-    start_epoch = 0
-    # Optionally resume from a checkpoint
-    if args.resume_from_checkpoint:
-        if os.path.isfile(args.resume_from_checkpoint):
-            print(f"Loading checkpoint: {args.resume_from_checkpoint}")
-            checkpoint = torch.load(args.resume_from_checkpoint)
-            start_epoch = checkpoint["epoch"] + 1
-            model.load_state_dict(checkpoint["state_dict"])
-            optimizer.load_state_dict(checkpoint["optimizer"])
-            print(f"Loaded checkpoint {args.resume_from_checkpoint} successfully.")
-        else:
-            raise ValueError(f"No checkpoint found at: {args.resume_from_checkpoint}")
-
-    cudnn.benchmark = True
-
-    for epoch in range(start_epoch, args.epochs):
-        print(f"Starting training epoch: {epoch}")
-        if dist_utils.is_dist_avail_and_initialized():
-            train_sampler.set_epoch(epoch)
-        adjust_learning_rate(optimizer, init_lr, epoch, args)
-
-        # train for one epoch
-        train(train_loader, model, criterion, optimizer, epoch, device_id, args)
-
-        # Checkpointing.
-        if dist_utils.get_rank() == 0:
-            checkpoint_name = "checkpoint_{:04d}.pth.tar".format(epoch)
-            checkpoint_file = os.path.join(args.checkpoint_dir, checkpoint_name)
-            save_checkpoint(
-                {
-                    "epoch": epoch,
-                    "arch": args.arch,
-                    "state_dict": model.state_dict(),
-                    "optimizer": optimizer.state_dict(),
-                },
-                filename=checkpoint_file,
-            )
-
-
-def train(train_loader, model, criterion, optimizer, epoch, device_id, args):
-    """Single epoch training code."""
-    losses = AverageMeter("Loss", ":.4f")
-    progress = ProgressMeter(
-        len(train_loader),
-        [losses],
-        prefix="Epoch: [{}]".format(epoch),
-    )
-
-    # switch to train mode
-    model.train()
-
-    for i, (images, _) in enumerate(train_loader):
-        # for images, _ in tqdm(train_loader):
-        images[0] = images[0].cuda(device_id, non_blocking=True)
-        images[1] = images[1].cuda(device_id, non_blocking=True)
-
-        # compute output and loss
-        p1, p2, z1, z2 = model(x1=images[0], x2=images[1])
-        loss = -(criterion(p1, z2).mean() + criterion(p2, z1).mean()) * 0.5
-
-        losses.update(loss.item(), images[0].size(0))
-
-        # compute gradient and do SGD step
-        optimizer.zero_grad()
-        loss.backward()
-        optimizer.step()
-
-        if i % args.print_freq == 0:
-            progress.display(i)
-
-
-def save_checkpoint(state, filename="checkpoint.pth.tar"):
-    """Save state dictionary into a model checkpoint."""
-    print(f"Saving checkpoint at: {filename}")
-    torch.save(state, filename)
-
-
-class AverageMeter(object):
-    """Computes and stores the average and current value"""
-
-    def __init__(self, name, fmt=":f"):
-        self.name = name
-        self.fmt = fmt
-        self.reset()
-
-    def reset(self):
-        self.val = 0
-        self.avg = 0
-        self.sum = 0
-        self.count = 0
-
-    def update(self, val, n=1):
-        self.val = val
-        self.sum += val * n
-        self.count += n
-        self.avg = self.sum / self.count
-
-    def __str__(self):
-        fmtstr = "{name} {val" + self.fmt + "} ({avg" + self.fmt + "})"
-        return fmtstr.format(**self.__dict__)
-
-
-class ProgressMeter(object):
-    def __init__(self, num_batches, meters, prefix=""):
-        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
-        self.meters = meters
-        self.prefix = prefix
-
-    def display(self, batch):
-        entries = [self.prefix + self.batch_fmtstr.format(batch)]
-        entries += [str(meter) for meter in self.meters]
-        print("\t".join(entries))
-
-    def _get_batch_fmtstr(self, num_batches):
-        num_digits = len(str(num_batches // 1))
-        fmt = "{:" + str(num_digits) + "d}"
-        return "[" + fmt + "/" + fmt.format(num_batches) + "]"
-
-
-def adjust_learning_rate(optimizer, init_lr, epoch, args):
-    """Decay the learning rate based on schedule."""
-    cur_lr = init_lr * 0.5 * (1.0 + math.cos(math.pi * epoch / args.epochs))
-    for param_group in optimizer.param_groups:
-        if "fix_lr" in param_group and param_group["fix_lr"]:
-            param_group["lr"] = init_lr
-        else:
-            param_group["lr"] = cur_lr
-
-
-if __name__ == "__main__":
-    main()