-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile.training
More file actions
61 lines (48 loc) · 1.54 KB
/
Dockerfile.training
File metadata and controls
61 lines (48 loc) · 1.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# Build: docker build -t gcr.io/cathode-screening/chgnet-training:latest .
# Push: docker push gcr.io/cathode-screening/chgnet-training:latest
FROM nvidia/cuda:12.1.1-devel-ubuntu22.04
# Avoid interactive prompts
ENV DEBIAN_FRONTEND=noninteractive
# Install system dependencies
RUN apt-get update && apt-get install -y \
python3.10 \
python3-pip \
python3.10-venv \
git \
wget \
curl \
&& rm -rf /var/lib/apt/lists/*
# Set Python alias
RUN ln -sf /usr/bin/python3.10 /usr/bin/python
# Upgrade pip
RUN pip install --upgrade pip
# Install PyTorch with CUDA 12.1
RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
# Install CHGNet and dependencies
RUN pip install \
chgnet>=0.3.0 \
pymatgen>=2024.1.1 \
numpy>=1.24 \
pandas>=2.0 \
scikit-learn>=1.3 \
tqdm>=4.65 \
mp-api>=0.37 \
python-dotenv>=1.0
# Additional training utilities
RUN pip install \
tensorboard>=2.14 \
wandb>=0.16 \
pyarrow>=14.0
# Create working directory
WORKDIR /app
# Copy training scripts
COPY scripts/ /app/scripts/
COPY src/ /app/src/
COPY requirements.txt /app/
# Install any remaining requirements
RUN pip install -r requirements.txt 2>/dev/null || true
# Set environment variables
ENV PYTHONUNBUFFERED=1
ENV CUDA_VISIBLE_DEVICES=0
# Default command: Download data, then FINE-TUNE ONLY (skips 12GB pretrain data), then upload results
CMD ["bash", "-c", "python scripts/download_data.py && python scripts/36_train_gcp_l4.py --phase finetune && python scripts/upload_results.py"]