From a4a96abd3164566acda6ee7a654216020e69605e Mon Sep 17 00:00:00 2001 From: Lianhao Lu Date: Tue, 17 Dec 2024 14:02:11 +0800 Subject: [PATCH] Fix model-downloader and tgi in multi shard case (#642) * Workaround to acknowledge HF_TOKEN in model-downloader Signed-off-by: Lianhao Lu * tgi: Fix permission issue of non-root user Fix issue #639 Signed-off-by: Lianhao Lu --------- Signed-off-by: Lianhao Lu --- .../common/speecht5/templates/deployment.yaml | 16 +++++++++++---- .../common/tei/templates/deployment.yaml | 16 +++++++++++---- .../teirerank/templates/deployment.yaml | 16 +++++++++++---- .../common/tgi/templates/configmap.yaml | 1 + .../common/tgi/templates/deployment.yaml | 20 +++++++++++++++---- .../common/vllm/templates/deployment.yaml | 14 ++++++++++--- .../common/whisper/templates/deployment.yaml | 14 ++++++++++--- 7 files changed, 75 insertions(+), 22 deletions(-) diff --git a/helm-charts/common/speecht5/templates/deployment.yaml b/helm-charts/common/speecht5/templates/deployment.yaml index 0db70e67e..1a926000a 100644 --- a/helm-charts/common/speecht5/templates/deployment.yaml +++ b/helm-charts/common/speecht5/templates/deployment.yaml @@ -35,8 +35,8 @@ spec: - configMapRef: name: {{ include "speecht5.fullname" . }}-config securityContext: - allowPrivilegeEscalation: false readOnlyRootFilesystem: true + allowPrivilegeEscalation: false {{- if hasKey .Values.securityContext "runAsGroup" }} runAsGroup: {{ .Values.securityContext.runAsGroup }} {{- end }} @@ -51,16 +51,24 @@ spec: seccompProfile: type: RuntimeDefault image: huggingface/downloader:0.17.3 - command: ['sh', '-c'] + command: ['sh', '-ec'] args: - | - huggingface-cli download --cache-dir /data --token $(HF_TOKEN) {{ .Values.TTS_MODEL_PATH | quote }}; - huggingface-cli download --cache-dir /data --token $(HF_TOKEN) {{ .Values.VOCODER_MODEL| quote }}; + echo "Huggingface log in ..."; + huggingface-cli login --token $(HF_TOKEN); + echo "Download models {{ .Values.TTS_MODEL_PATH }} {{ .Values.VOCODER_MODEL }} ... "; + huggingface-cli download --cache-dir /data {{ .Values.TTS_MODEL_PATH | quote }}; + huggingface-cli download --cache-dir /data {{ .Values.VOCODER_MODEL| quote }}; + echo "Change model files mode ..."; chmod -R g+w /data/models--{{ replace "/" "--" .Values.TTS_MODEL_PATH }}; chmod -R g+w /data/models--{{ replace "/" "--" .Values.VOCODER_MODEL }} + # NOTE: Buggy logout command; + # huggingface-cli logout; volumeMounts: - mountPath: /data name: model-volume + - mountPath: /tmp + name: tmp {{- end }} containers: - name: {{ .Release.Name }} diff --git a/helm-charts/common/tei/templates/deployment.yaml b/helm-charts/common/tei/templates/deployment.yaml index 10f13981c..43e7c40da 100644 --- a/helm-charts/common/tei/templates/deployment.yaml +++ b/helm-charts/common/tei/templates/deployment.yaml @@ -38,8 +38,8 @@ spec: - configMapRef: name: {{ include "tei.fullname" . }}-config securityContext: - allowPrivilegeEscalation: false readOnlyRootFilesystem: true + allowPrivilegeEscalation: false {{- if hasKey .Values.securityContext "runAsGroup" }} runAsGroup: {{ .Values.securityContext.runAsGroup }} {{- end }} @@ -54,14 +54,22 @@ spec: seccompProfile: type: RuntimeDefault image: huggingface/downloader:0.17.3 - command: ['sh', '-c'] + command: ['sh', '-ec'] args: - | - huggingface-cli download --cache-dir /data --token $(HF_TOKEN) $(MODEL_ID); - chmod -R g+w /data/models--{{ replace "/" "--" .Values.EMBEDDING_MODEL_ID }} + echo "Huggingface log in ..."; + huggingface-cli login --token $(HF_TOKEN); + echo "Download model {{ .Values.EMBEDDING_MODEL_ID }} ... "; + huggingface-cli download --cache-dir /data {{ .Values.EMBEDDING_MODEL_ID | quote }}; + echo "Change model files mode ..."; + chmod -R g+w /data/models--{{ replace "/" "--" .Values.EMBEDDING_MODEL_ID }}; + # NOTE: Buggy logout command; + # huggingface-cli logout; volumeMounts: - mountPath: /data name: model-volume + - mountPath: /tmp + name: tmp {{- end }} containers: - name: {{ .Chart.Name }} diff --git a/helm-charts/common/teirerank/templates/deployment.yaml b/helm-charts/common/teirerank/templates/deployment.yaml index 3387fad0e..6692d95d7 100644 --- a/helm-charts/common/teirerank/templates/deployment.yaml +++ b/helm-charts/common/teirerank/templates/deployment.yaml @@ -38,8 +38,8 @@ spec: - configMapRef: name: {{ include "teirerank.fullname" . }}-config securityContext: - allowPrivilegeEscalation: false readOnlyRootFilesystem: true + allowPrivilegeEscalation: false {{- if hasKey .Values.securityContext "runAsGroup" }} runAsGroup: {{ .Values.securityContext.runAsGroup }} {{- end }} @@ -54,14 +54,22 @@ spec: seccompProfile: type: RuntimeDefault image: huggingface/downloader:0.17.3 - command: ['sh', '-c'] + command: ['sh', '-ec'] args: - | - huggingface-cli download --cache-dir /data --token $(HF_TOKEN) $(MODEL_ID); - chmod -R g+w /data/models--{{ replace "/" "--" .Values.RERANK_MODEL_ID }} + echo "Huggingface log in ..."; + huggingface-cli login --token $(HF_TOKEN); + echo "Download model {{ .Values.RERANK_MODEL_ID }} ... "; + huggingface-cli download --cache-dir /data {{ .Values.RERANK_MODEL_ID | quote }}; + echo "Change model files mode ..."; + chmod -R g+w /data/models--{{ replace "/" "--" .Values.RERANK_MODEL_ID }}; + # NOTE: Buggy logout command; + # huggingface-cli logout; volumeMounts: - mountPath: /data name: model-volume + - mountPath: /tmp + name: tmp {{- end }} containers: - name: {{ .Chart.Name }} diff --git a/helm-charts/common/tgi/templates/configmap.yaml b/helm-charts/common/tgi/templates/configmap.yaml index 82be971e4..0b7385870 100644 --- a/helm-charts/common/tgi/templates/configmap.yaml +++ b/helm-charts/common/tgi/templates/configmap.yaml @@ -19,6 +19,7 @@ data: no_proxy: {{ .Values.global.no_proxy | quote }} {{- if contains "tgi-gaudi" .Values.image.repository }} HABANA_LOGS: "/tmp/habana_logs" + TRITON_CACHE_DIR: "/tmp/triton_cache" {{- end }} NUMBA_CACHE_DIR: "/tmp" HF_HOME: "/tmp/.cache/huggingface" diff --git a/helm-charts/common/tgi/templates/deployment.yaml b/helm-charts/common/tgi/templates/deployment.yaml index 66d62ddaa..9335234f4 100644 --- a/helm-charts/common/tgi/templates/deployment.yaml +++ b/helm-charts/common/tgi/templates/deployment.yaml @@ -38,8 +38,8 @@ spec: - configMapRef: name: {{ include "tgi.fullname" . }}-config securityContext: - allowPrivilegeEscalation: false readOnlyRootFilesystem: true + allowPrivilegeEscalation: false {{- if hasKey .Values.securityContext "runAsGroup" }} runAsGroup: {{ .Values.securityContext.runAsGroup }} {{- end }} @@ -54,14 +54,22 @@ spec: seccompProfile: type: RuntimeDefault image: huggingface/downloader:0.17.3 - command: ['sh', '-c'] + command: ['sh', '-ec'] args: - | - huggingface-cli download --cache-dir /data --token $(HF_TOKEN) $(MODEL_ID); - chmod -R g+w /data/models--{{ replace "/" "--" .Values.LLM_MODEL_ID }} + echo "Huggingface log in ..."; + huggingface-cli login --token $(HF_TOKEN); + echo "Download model {{ .Values.LLM_MODEL_ID }} ... "; + huggingface-cli download --cache-dir /data {{ .Values.LLM_MODEL_ID | quote }}; + echo "Change model files mode ..."; + chmod -R g+w /data/models--{{ replace "/" "--" .Values.LLM_MODEL_ID }}; + # NOTE: Buggy logout command; + # huggingface-cli logout; volumeMounts: - mountPath: /data name: model-volume + - mountPath: /tmp + name: tmp {{- end }} containers: - name: {{ .Chart.Name }} @@ -92,6 +100,8 @@ spec: name: shm - mountPath: /tmp name: tmp + - mountPath: /usr/src/out + name: tokenizer ports: - name: http containerPort: {{ .Values.port }} @@ -128,6 +138,8 @@ spec: sizeLimit: {{ .Values.shmSize }} - name: tmp emptyDir: {} + - name: tokenizer + emptyDir: {} {{- with .Values.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} diff --git a/helm-charts/common/vllm/templates/deployment.yaml b/helm-charts/common/vllm/templates/deployment.yaml index 71aef0290..afa559cd6 100644 --- a/helm-charts/common/vllm/templates/deployment.yaml +++ b/helm-charts/common/vllm/templates/deployment.yaml @@ -35,8 +35,8 @@ spec: - configMapRef: name: {{ include "vllm.fullname" . }}-config securityContext: - allowPrivilegeEscalation: false readOnlyRootFilesystem: true + allowPrivilegeEscalation: false capabilities: drop: - ALL @@ -48,14 +48,22 @@ spec: seccompProfile: type: RuntimeDefault image: huggingface/downloader:0.17.3 - command: ['sh', '-c'] + command: ['sh', '-ec'] args: - | - huggingface-cli download --cache-dir /data --token $(HF_TOKEN) {{ .Values.LLM_MODEL_ID | quote }}; + echo "Huggingface log in ..."; + huggingface-cli login --token $(HF_TOKEN); + echo "Download model {{ .Values.LLM_MODEL_ID }} ... "; + huggingface-cli download --cache-dir /data {{ .Values.LLM_MODEL_ID | quote }}; + echo "Change model files mode ..."; chmod -R g+w /data/models--{{ replace "/" "--" .Values.LLM_MODEL_ID }} + # NOTE: Buggy logout command; + # huggingface-cli logout; volumeMounts: - mountPath: /data name: model-volume + - mountPath: /tmp + name: tmp {{- end }} containers: - name: {{ .Chart.Name }} diff --git a/helm-charts/common/whisper/templates/deployment.yaml b/helm-charts/common/whisper/templates/deployment.yaml index 4d4503d99..2b965f395 100644 --- a/helm-charts/common/whisper/templates/deployment.yaml +++ b/helm-charts/common/whisper/templates/deployment.yaml @@ -35,8 +35,8 @@ spec: - configMapRef: name: {{ include "whisper.fullname" . }}-config securityContext: - allowPrivilegeEscalation: false readOnlyRootFilesystem: true + allowPrivilegeEscalation: false {{- if hasKey .Values.securityContext "runAsGroup" }} runAsGroup: {{ .Values.securityContext.runAsGroup }} {{- end }} @@ -51,14 +51,22 @@ spec: seccompProfile: type: RuntimeDefault image: huggingface/downloader:0.17.3 - command: ['sh', '-c'] + command: ['sh', '-ec'] args: - | - huggingface-cli download --cache-dir /data --token $(HF_TOKEN) {{ .Values.ASR_MODEL_PATH | quote }}; + echo "Huggingface log in ..."; + huggingface-cli login --token $(HF_TOKEN); + echo "Download model {{ .Values.ASR_MODEL_PATH }} ... "; + huggingface-cli download --cache-dir /data {{ .Values.ASR_MODEL_PATH | quote }}; + echo "Change model files mode ..."; chmod -R g+w /data/models--{{ replace "/" "--" .Values.ASR_MODEL_PATH }} + # NOTE: Buggy logout command; + # huggingface-cli logout; volumeMounts: - mountPath: /data name: model-volume + - mountPath: /tmp + name: tmp {{- end }} containers: - name: {{ .Release.Name }}