dscc-admin-ch · Jono711 · Jun 25, 2025 · Jun 23, 2025 · Jun 25, 2025
diff --git a/statbot-helm/Chart.yaml b/statbot-helm/Chart.yaml
@@ -2,11 +2,11 @@ apiVersion: v2
 name: statbot
 description: A Helm chart for deploying the Statbot API and UI
 type: application
-version: 0.0.2
-appVersion: 0.0.2
+version: 0.0.3
+appVersion: 0.0.3
 
 dependencies:
   - name: llm-serving
-    version: 0.0.5
+    version: 0.1.3
     repository: https://dscc-admin-ch.github.io/helm-charts
     alias:  vllm
diff --git a/statbot-helm/templates/configmap.yaml b/statbot-helm/templates/configmap.yaml
@@ -3,7 +3,7 @@ kind: ConfigMap
 metadata:
   name: api-config
 data:
-  MODEL_NAME: "{{ .Values.vllm.deployment.args.model }}"
+  MODEL_NAME: "{{ .Values.vllm.llm.hfModelName }}"
   MODEL_PATH: "{{ .Values.api.env.MODEL_PATH }}"
   INFERENCE_SERVER_URL: "https://{{ .Values.vllm.ingress.hostname }}/v1"
   DB_HOST: "{{ .Values.api.env.DB_HOST }}"

diff --git a/statbot-helm/templates/secrets.yaml b/statbot-helm/templates/secrets.yaml
@@ -6,4 +6,4 @@ type: Opaque
 data:
   DB_USERNAME: {{ .Values.api.env.DB_USERNAME | b64enc }}
   DB_PASS: {{ .Values.api.env.DB_PASS | b64enc }}
-  DEPLOYED_LLM_TOKEN: {{ .Values.vllm.deployment.hftoken | b64enc }}
+  DEPLOYED_LLM_TOKEN: {{ .Values.vllm.huggingFace.hfToken | b64enc }}
diff --git a/statbot-helm/values.yaml b/statbot-helm/values.yaml
@@ -25,43 +25,54 @@ ingress:
     hostname: "statbot.lab.sspcloud.fr"
 
 vllm:
+  global:
+  suspend: false # Allow scaling the app to 0
+
   nameOverride: ""
   fullnameOverride: "llm-serving"
   podAnnotations: {}
 
-  deployment:
+  service:
+    type: ClusterIP
+    nodePort:
     image:
-      repository: vllm/vllm-openai
-      pullPolicy: Always
-      tag: latest
-    hftoken: ton_token
-    args:
-      model: meta-llama/Llama-3.2-3B-Instruct
-      memoryutilization: 0.8
-      dtype: half
-      maxModelLen: 8208
-    gpu:
-      number: 1
+      version: "vllm/vllm-openai:v0.8.2"
+      pullPolicy: IfNotPresent
+      custom:
+        enabled: false
+        version: ""
 
-  service:
+  resources:
+    limits:
+      gpu:
+        number: 1
+
+  huggingFace:
+    hfToken: ""
+
+  llm:
+    hfModelName: meta-llama/Llama-3.2-1B-Instruct
+    localPath: "/root/.cache/huggingface"
+    args: '--gpu-memory-utilization "0.8" --dtype "half" --max-model-len "8200"'
+
+  networking:
     port:
       number: 8000
 
-
   ingress:
-      enabled: true
-      className: "nginx"
-      annotations:
-        nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
-      hostname: "llm-serving-statbot.lab.sspcloud.fr"
+    enabled: true
+    className: ""
+    annotations:
+      nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
+    hostname: "llm-serving-statbot.lab.sspcloud.fr"
 
   s3:
-    enabled: false # Set to true to use S3
-    bucket: "your_bucket_name"
-    modelPath: "path_to_model"
-    # If not set and create is true, a name is generated using the fullname template
+    enabled: true # Set to true to use S3
+    s3ModelPath: ""
     accessKeyId: ""
     endpoint: ""
     defaultRegion: ""
     secretAccessKey: ""
     sessionToken: ""
+
+  nodeSelector: {}