Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions statbot-helm/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ apiVersion: v2
name: statbot
description: A Helm chart for deploying the Statbot API and UI
type: application
version: 0.0.2
appVersion: 0.0.2
version: 0.0.3
appVersion: 0.0.3

dependencies:
- name: llm-serving
version: 0.0.5
version: 0.1.3
repository: https://dscc-admin-ch.github.io/helm-charts
alias: vllm
2 changes: 1 addition & 1 deletion statbot-helm/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ kind: ConfigMap
metadata:
name: api-config
data:
MODEL_NAME: "{{ .Values.vllm.deployment.args.model }}"
MODEL_NAME: "{{ .Values.vllm.llm.hfModelName }}"
MODEL_PATH: "{{ .Values.api.env.MODEL_PATH }}"
INFERENCE_SERVER_URL: "https://{{ .Values.vllm.ingress.hostname }}/v1"
DB_HOST: "{{ .Values.api.env.DB_HOST }}"
Expand Down
2 changes: 1 addition & 1 deletion statbot-helm/templates/secrets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ type: Opaque
data:
DB_USERNAME: {{ .Values.api.env.DB_USERNAME | b64enc }}
DB_PASS: {{ .Values.api.env.DB_PASS | b64enc }}
DEPLOYED_LLM_TOKEN: {{ .Values.vllm.deployment.hftoken | b64enc }}
DEPLOYED_LLM_TOKEN: {{ .Values.vllm.huggingFace.hfToken | b64enc }}
57 changes: 34 additions & 23 deletions statbot-helm/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,43 +25,54 @@ ingress:
hostname: "statbot.lab.sspcloud.fr"

vllm:
global:
suspend: false # Allow scaling the app to 0

nameOverride: ""
fullnameOverride: "llm-serving"
podAnnotations: {}

deployment:
service:
type: ClusterIP
nodePort:
image:
repository: vllm/vllm-openai
pullPolicy: Always
tag: latest
hftoken: ton_token
args:
model: meta-llama/Llama-3.2-3B-Instruct
memoryutilization: 0.8
dtype: half
maxModelLen: 8208
gpu:
number: 1
version: "vllm/vllm-openai:v0.8.2"
pullPolicy: IfNotPresent
custom:
enabled: false
version: ""

service:
resources:
limits:
gpu:
number: 1

huggingFace:
hfToken: ""

llm:
hfModelName: meta-llama/Llama-3.2-1B-Instruct
localPath: "/root/.cache/huggingface"
args: '--gpu-memory-utilization "0.8" --dtype "half" --max-model-len "8200"'

networking:
port:
number: 8000


ingress:
enabled: true
className: "nginx"
annotations:
nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
hostname: "llm-serving-statbot.lab.sspcloud.fr"
enabled: true
className: ""
annotations:
nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
hostname: "llm-serving-statbot.lab.sspcloud.fr"

s3:
enabled: false # Set to true to use S3
bucket: "your_bucket_name"
modelPath: "path_to_model"
# If not set and create is true, a name is generated using the fullname template
enabled: true # Set to true to use S3
s3ModelPath: ""
accessKeyId: ""
endpoint: ""
defaultRegion: ""
secretAccessKey: ""
sessionToken: ""

nodeSelector: {}