diff --git a/deploy.sh b/deploy.sh index 656bff2..7e535da 100755 --- a/deploy.sh +++ b/deploy.sh @@ -2,15 +2,15 @@ if [ "$1" = "--no-cache" ]; then echo "📦️ Building without cache" - ssh idsg1 'cd /mnt/um-share-drive/vemonet/libre-chat ; git pull ; docker-compose build --no-cache ; docker-compose down ; docker-compose up --force-recreate -d' + ssh idsg1 'cd /mnt/um-share-drive/vemonet/libre-chat ; git pull ; docker compose build --no-cache ; docker compose down ; docker compose up --force-recreate -d' else echo "♻️ Building with cache" - ssh ids2 'cd /mnt/um-share-drive/vemonet/libre-chat ; git pull ; docker-compose up --force-recreate --build -d' + ssh idsg1 'cd /mnt/um-share-drive/vemonet/libre-chat ; git pull ; docker compose up --force-recreate --build -d' fi # Build with cache: -# ssh ids2 'cd /data/deploy-services/knowledge-collaboratory ; git pull ; docker-compose -f docker-compose.yml -f docker-compose.prod.yml up --force-recreate --build -d' +# ssh ids2 'cd /data/deploy-services/knowledge-collaboratory ; git pull ; docker compose -f docker compose.yml -f docker compose.prod.yml up --force-recreate --build -d' # Build without cache: -# ssh ids2 'cd /data/deploy-services/knowledge-collaboratory ; git pull ; docker-compose -f docker-compose.yml -f docker-compose.prod.yml build ; docker-compose -f docker-compose.yml -f docker-compose.prod.yml down ; docker-compose -f docker-compose.yml -f docker-compose.prod.yml up --force-recreate -d' +# ssh ids2 'cd /data/deploy-services/knowledge-collaboratory ; git pull ; docker compose -f docker compose.yml -f docker compose.prod.yml build ; docker compose -f docker compose.yml -f docker compose.prod.yml down ; docker compose -f docker compose.yml -f docker compose.prod.yml up --force-recreate -d' diff --git a/docs/docs/use-docker.md b/docs/docs/use-docker.md index 568fdac..4bb2e0a 100644 --- a/docs/docs/use-docker.md +++ b/docs/docs/use-docker.md @@ -1,10 +1,6 @@ [![Image size](https://ghcr-badge.egpl.dev/vemonet/libre-chat/size)](https://github.com/vemonet/libre-chat/pkgs/container/libre-chat) -Libre Chat is available as a [docker image](https://github.com/vemonet/libre-chat/pkgs/container/libre-chat), it is recommended to use docker for deploying in production as it uses gunicorn to run multiple workers. - -!!! Warning "Shared memory for multiple users" - - Memory of the chatbot is shared betweem the users that are on the same worker. +Libre Chat is available as a [docker image](https://github.com/vemonet/libre-chat/pkgs/container/libre-chat) that will use CUDA when available. It is recommended to use docker for deploying in production as it uses gunicorn to run multiple workers. ## ⚡ Quickstart @@ -14,6 +10,11 @@ If you just want deploy it using the pre-trained Mixtral model, you can use dock docker run -it -p 8000:8000 ghcr.io/vemonet/libre-chat:main ``` +!!! Warning "Loading the model takes time" + + Downloading the model will take time the first time, you can also pre-download it manually. + If you are using GPU, loading the model when the application starts also takes some time (can take a few minutes) + ## ⚙️ Configure with docker compose 1. Create a `chat.yml` file with your chat web service configuration. diff --git a/src/libre_chat/llm.py b/src/libre_chat/llm.py index 67bbebd..0f6d0c5 100644 --- a/src/libre_chat/llm.py +++ b/src/libre_chat/llm.py @@ -226,6 +226,7 @@ def build_vectorstore(self, documents_path: Optional[str] = None) -> Optional[Qd embeddings = HuggingFaceEmbeddings( model_name=self.conf.vector.embeddings_path, model_kwargs={"device": self.device} ) + os.makedirs(self.conf.vector.vector_path, exist_ok=True) vectorstore = Qdrant.from_documents( splitted_texts, embeddings,