Merge pull request #72 from datakind/feat/selenium-e2e-3

Migration of tests to use Azure instead of OpenAI
datakind · Jul 12, 2024 · 0ae8e8f · 0ae8e8f
2 parents 34ca22d + 67aaa3e
commit 0ae8e8f
Show file tree

Hide file tree

Showing 14 changed files with 153 additions and 71 deletions.
diff --git a/.env.example b/.env.example
@@ -30,21 +30,15 @@ RECIPE_DB_CONN_STRING=postgresql://${POSTGRES_RECIPE_USER}:${POSTGRES_RECIPE_PAS
 # These control how recipes are retrieved and generated using LLMs.
 #
 # If you are using Azure OpenAI. Note, in Playground in Azure, you can 'View code' to get these
-#RECIPES_OPENAI_API_TYPE=azure
-#RECIPES_OPENAI_API_KEY=
-#RECIPES_OPENAI_API_ENDPOINT=<eg https://<YOUR DEPLOYMENT NAME>.openai.azure.com/>
-#RECIPES_OPENAI_API_VERSION=2024-05-01-preview
-#RECIPES_MODEL=<The deployment name you created in Azure, eg gpt-4o>
+RECIPES_OPENAI_API_TYPE=<azure or openai>
+RECIPES_OPENAI_API_KEY=<API Key>
+RECIPES_OPENAI_API_ENDPOINT=<only for Azure, eg https://<YOUR DEPLOYMENT NAME>.openai.azure.com/>
+RECIPES_OPENAI_API_VERSION=<only for Azure, eg 2024-02-15-preview>
+RECIPES_MODEL=<On Opne AI model name, on Azure the deployment name you created in Azure, eg gpt-4o>
 #
 # Leave these as-is for quick start
-#RECIPES_OPENAI_TEXT_COMPLETION_DEPLOYMENT_NAME=text-embedding-ada-002
-#RECIPES_BASE_URL=${RECIPES_OPENAI_API_ENDPOINT}
-
-# OpenAI example
-RECIPES_OPENAI_API_TYPE=openai
-RECIPES_OPENAI_API_KEY=
-RECIPES_MODEL=gpt-4o
 RECIPES_OPENAI_TEXT_COMPLETION_DEPLOYMENT_NAME=text-embedding-ada-002
+RECIPES_BASE_URL=${RECIPES_OPENAI_API_ENDPOINT}
 
 # Variables that control matching of recipes
 # Memory cutoff for the AI model (lower is more similar)
@@ -74,25 +68,17 @@ HAPI_API_TOKEN=
 # Parameters for the AI assistant used in the chat interface, to serve recipes and carry out
 # on-the-fly-analysis
 #
-# # If you are using Azure OpenAI. Note, in Playground in Azure, you can 'View code' to get these
-#ASSISTANTS_API_TYPE=azure  
-#ASSISTANTS_API_KEY=<API Key as found on the Azure OpenAI resource>
-#ASSISTANTS_ID=<ID of the assistant you created in OpenAI. Leave blank if you do not have one yet>
-#ASSISTANTS_BASE_URL=<eg https://<YOUR DEPLOYMENT NAME>.openai.azure.com/>
-#ASSISTANTS_API_VERSION=2024-05-01-preview
-#ASSISTANTS_MODEL=<The deployment name of the model you created in Azure which the assitant uses, eg gpt-4o>
-#ASSISTANTS_BOT_NAME=<Your assistant name, eg "Humanitarian AI Assistant">
-
-# ======= START: OpenAI (ie not Azure) =================
-ASSISTANTS_API_TYPE=openai 
-OPENAI_API_KEY=<The API key you created on OpenAI>
+ASSISTANTS_API_TYPE=<azure or openai>  
+ASSISTANTS_API_KEY=<API Key as found on the Azure OpenAI resource>
 ASSISTANTS_ID=<ID of the assistant you created in OpenAI. Leave blank if you do not have one yet>
-ASSISTANTS_MODEL=<The model your assistant uses>
+ASSISTANTS_BASE_URL=<for Azure only, eg https://<YOUR DEPLOYMENT NAME>.openai.azure.com/>
+ASSISTANTS_API_VERSION=<For Azure only, eg 2024-02-15-preview>
+ASSISTANTS_MODEL=<On Open AI, the model name, on Azure the deployment name of the model you created in Azure which the assitant uses, eg gpt-4o>
 ASSISTANTS_BOT_NAME=<Your assistant name, eg "Humanitarian AI Assistant">
+
 # Leave as-is
 ASSISTANTS_API_KEY=${OPENAI_API_KEY}
 ASSISTANTS_BASE_URL=""
-
 #==================================================#
 #             Deployments Settings                 #
 #==================================================#
@@ -124,4 +110,3 @@ LITERAL_API_KEY=
 CHAINLIT_AUTH_SECRET="1R_FKRaiv0~5bqoQurBx34ctOD8kM%a=YvIx~fVmYLVd>B5vWa>e9rDX?6%^iCOv"
 USER_LOGIN=muppet-data-chef
 USER_PASSWORD=hB%1b36!!8-v
-
diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml
@@ -13,8 +13,8 @@ jobs:
       runs-on: ubuntu-latest 
       environment: "GitHub Actions 1"
       env: 
-        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-        ASSISTANTS_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        ASSISTANTS_API_KEY: ${{ secrets.ASSISTANTS_API_KEY }}
+        ASSISTANTS_API_VERSION: ${{ secrets.ASSISTANTS_API_VERSION }}
         ASSISTANTS_API_TYPE: ${{ secrets.ASSISTANTS_API_TYPE }}
         ASSISTANTS_ID: ${{ secrets.ASSISTANTS_ID }} 
         ASSISTANTS_BASE_URL: ${{ secrets.ASSISTANTS_BASE_URL }} 
@@ -41,14 +41,18 @@ jobs:
         RECIPES_OPENAI_API_KEY: ${{ secrets.RECIPES_OPENAI_API_KEY }} 
         RECIPES_MODEL: ${{ secrets.RECIPES_MODEL }} 
         RECIPES_OPENAI_TEXT_COMPLETION_DEPLOYMENT_NAME: ${{ secrets.RECIPES_OPENAI_TEXT_COMPLETION_DEPLOYMENT_NAME }} 
+        RECIPES_OPENAI_API_ENDPOINT: ${{ secrets.RECIPES_OPENAI_API_ENDPOINT }}
+        RECIPES_OPENAI_API_VERSION: ${{ secrets.RECIPES_OPENAI_API_VERSION }}
+        RECIPES_BASE_URL: ${{ secrets.RECIPES_BASE_URL }}
+
         RECIPES_MEMORY_SIMILARITY_CUTOFF: ${{ secrets.RECIPES_MEMORY_SIMILARITY_CUTOFF }} 
         RECIPES_RECIPE_SIMILARITY_CUTOFF: ${{ secrets.RECIPES_RECIPE_SIMILARITY_CUTOFF }} 
         RECIPES_HELPER_FUNCTION_SIMILARITY_CUTOFF: ${{ secrets.RECIPES_HELPER_FUNCTION_SIMILARITY_CUTOFF }} 
         RECIPES_MODEL_TEMP: ${{ secrets.RECIPES_MODEL_TEMP }} 
         RECIPES_MODEL_MAX_TOKENS: ${{ secrets.RECIPES_MODEL_MAX_TOKENS }} 
 
         IMAGE_HOST: ${{ secrets.IMAGE_HOST }} 
-        RECIPE_SERVER_API: ${{ secrets.RECIPE_SERVER_API_FROM_GH_HOST }} 
+        RECIPE_SERVER_API: ${{ secrets.RECIPE_SERVER_API }} 
         CHAT_URL: ${{ secrets.CHAT_URL }}
 
         CHAINLIT_AUTH_SECRET: ${{ secrets.CHAINLIT_AUTH_SECRET }} 
@@ -77,11 +81,27 @@ jobs:
             # TODO docker-compose files should be refactored to use scopes instead of different versions for each environment
             echo "Starting docker containers for dbs and server ..."
             docker-compose pull
-            docker-compose up -d --build 
+            docker-compose up -d --build datadb recipedb server chat
 
             # TODO: For some reason, maybe buildkit, in Github docker compose builds the image differently, and it doesn't work. Individual image build works.
-            docker build --build-arg OPENAI_API_KEY=$OPENAI_API_KEY --build-arg CHAT_URL=$CHAT_URL --build-arg OPENAI_API_ENDPOINT=$OPENAI_API_ENDPOINT --no-cache -t promptflow -f ./flows/chainlit-ui-evaluation/Dockerfile  .
-            docker run --env RECIPES_MODEL_MAX_TOKENS=${RECIPES_MODEL_MAX_TOKENS} --env RECIPES_MODEL_TEMP=${RECIPES_MODEL_TEMP} --env RECIPES_OPENAI_API_TYPE=${ASSISTANTS_API_TYPE} --env RECIPES_OPENAI_API_KEY=${ASSISTANTS_API_KEY} --env RECIPES_MODEL=${RECIPES_MODEL} --env RECIPES_BASE_URL=${RECIPES_BASE_URL} --env USER_LOGIN=${USER_LOGIN} --env USER_PASSWORD=${USER_PASSWORD} --env CHAT_URL=${CHAT_URL} --network=data-recipes-ai_default -d --name promptflow promptflow 
+            docker build --build-arg OPENAI_API_KEY=$ASSISTANTS_API_KEY \
+                         --build-arg OPENAI_API_ENDPOINT=$ASSISTANTS_BASE_URL \
+                         --build-arg OPENAI_API_VERSION=$ASSISTANTS_API_VERSION \
+                         --build-arg CHAT_URL=$CHAT_URL \
+                         --build-arg OPENAI_API_ENDPOINT=$ASSISTANTS_BASE_URL \
+                         --no-cache -t promptflow -f ./flows/chainlit-ui-evaluation/Dockerfile  .
+
+            docker run --env RECIPES_MODEL_MAX_TOKENS=${RECIPES_MODEL_MAX_TOKENS} \
+                       --env RECIPES_MODEL_TEMP=${RECIPES_MODEL_TEMP} \
+                       --env RECIPES_OPENAI_API_TYPE=${RECIPES_OPENAI_API_TYPE} \
+                       --env RECIPES_OPENAI_API_KEY=${RECIPES_OPENAI_API_KEY} \
+                       --env RECIPES_OPENAI_API_VERSION=${ASSISTANTS_API_VERSION} \
+                       --env RECIPES_MODEL=${RECIPES_MODEL} \
+                       --env RECIPES_BASE_URL=${RECIPES_BASE_URL} \
+                       --env USER_LOGIN=${USER_LOGIN} \
+                       --env USER_PASSWORD=${USER_PASSWORD} \
+                       --env CHAT_URL=${CHAT_URL} \
+                       --network=data-recipes-ai_default -d --name promptflow promptflow 
 
         - name: Check logs
           run: |
@@ -115,14 +135,30 @@ jobs:
         #  uses: lhotari/action-upterm@v1
 
         #- name: DEBUG - Run Selenium outside of promptflow
-        #   run: |
-        #     docker exec promptflow python call_assistant.py
+        #  run: |
+        #    docker exec promptflow python call_assistant.py
 
-        - name: Run tests
+        - name: Run tests 
           run: |
             env > .env 
             docker exec promptflow pf run create --flow . --data ./data.jsonl --stream  --column-mapping query='${data.query}' context='${data.context}' chat_history='${data.chat_history}'  --name base_run 
 
+        - name: Check logs post-tests
+          run: |
+            docker ps
+
+            echo "logs datadb ..."
+            docker compose logs datadb
+
+            echo "logs promptflow ..."
+            docker logs promptflow
+
+            echo "logs chat ..."
+            docker compose logs chat
+
+            echo "logs server ..."
+            docker compose logs server  
+
         - name: Show results
           run: |
             docker exec promptflow pf run show-details -n base_run

diff --git a/.github/workflows/get_memory_test.yml b/.github/workflows/get_memory_test.yml
@@ -10,6 +10,7 @@ jobs:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
         ASSISTANTS_API_KEY: ${{ secrets.OPENAI_API_KEY }}
         ASSISTANTS_API_TYPE: ${{ secrets.ASSISTANTS_API_TYPE }}
+        ASSISTANTS_API_VERSION: ${{ secrets.ASSISTANTS_API_VERSION }}
         ASSISTANTS_ID: ${{ secrets.ASSISTANTS_ID }} 
         ASSISTANTS_BASE_URL: ${{ secrets.ASSISTANTS_BASE_URL }} 
         ASSISTANTS_MODEL: ${{ secrets.ASSISTANTS_MODEL }} 
@@ -30,11 +31,15 @@ jobs:
         POSTGRES_RECIPE_PASSWORD: ${{ secrets.POSTGRES_RECIPE_PASSWORD }} 
 
         RECIPE_DB_CONN_STRING: "postgresql://${{ secrets.POSTGRES_RECIPE_USER }}:${{ secrets.POSTGRES_RECIPE_PASSWORD }}@${{ secrets.POSTGRES_RECIPE_HOST }}:${{ secrets.POSTGRES_RECIPE_PORT }}/${{ secrets.POSTGRES_RECIPE_DB }}"
-
+        RECIPES_BASE_URL: ${{ secrets.RECIPES_BASE_URL }}
         RECIPES_OPENAI_API_TYPE: ${{ secrets.RECIPES_OPENAI_API_TYPE }} 
         RECIPES_OPENAI_API_KEY: ${{ secrets.RECIPES_OPENAI_API_KEY }} 
         RECIPES_MODEL: ${{ secrets.RECIPES_MODEL }} 
         RECIPES_OPENAI_TEXT_COMPLETION_DEPLOYMENT_NAME: ${{ secrets.RECIPES_OPENAI_TEXT_COMPLETION_DEPLOYMENT_NAME }} 
+        RECIPES_OPENAI_API_ENDPOINT: ${{ secrets.RECIPES_OPENAI_API_ENDPOINT }}
+        RECIPES_OPENAI_API_VERSION: ${{ secrets.RECIPES_OPENAI_API_VERSION }}
+
+
         RECIPES_MEMORY_SIMILARITY_CUTOFF: ${{ secrets.RECIPES_MEMORY_SIMILARITY_CUTOFF }} 
         RECIPES_RECIPE_SIMILARITY_CUTOFF: ${{ secrets.RECIPES_RECIPE_SIMILARITY_CUTOFF }} 
         RECIPES_HELPER_FUNCTION_SIMILARITY_CUTOFF: ${{ secrets.RECIPES_HELPER_FUNCTION_SIMILARITY_CUTOFF }} 
@@ -48,8 +53,6 @@ jobs:
         USER_LOGIN: ${{ secrets.USER_LOGIN }} 
         USER_PASSWORD: ${{ secrets.USER_PASSWORD }} 
 
-        COLUMNS: 150
-
       steps:
 
         - name: Checkout
@@ -102,5 +105,5 @@ jobs:
         - name: Run tests
           run: |
             echo "exec into container ..."
-            docker compose exec server bash -c "cd tests/ && pytest"
- 
+            docker compose exec server bash -c "cd tests/ && pytest -vv"
+
diff --git a/CONTRIBUTION.md b/CONTRIBUTION.md
@@ -83,14 +83,18 @@ Then ...
    `Dev Containers: Attach to remote container`. 
 
    Select the promptflow container. This opens a new VSCode window - use it for the next steps.
-4. Install Promptflow add-in
+4. It should happen automatically, but if not, install Promptflow add-in
 5. Open folder `/app`
 6. Click on `flow.dag.yaml`
 7. Top left of main pane, click on 'Visual editor'
      - If you are taken to the promptflow 'Install dependencies'' screen, change the Python runtime to be ` /azureml-envs/prompt-flow/runtime/bin/python` 'runtime', then close and re-open `flow.dag.yaml`
 8. On the Groundedness node, select your new connection
 9. You can no run by clicking the play icon. See Promptflow documentation for more details
 
+#### Changing between Azure OpenAI <> OpenAI
+
+As noted in the README, the repo supports assistants on OpenAI or Azure OpenAI. The README has instructions on how to change in the `.env` file, but you will also have to change the connection in the promptflow groundedness node accordingly.
+
 ## GitHub Workflow
 
 As many other open source projects, we use the famous
@@ -187,6 +191,20 @@ Then ...
 8. On the Groundedness node, select your new connection
 9. You can no run by clicking the play icon. See Promptflow documentation for more details
 
+# Adding new Data sources
+
+## Open API (not OpenAI!) data sources
+
+As mentioned in the main [README](README.md), the assistant can be used with openapi standard API, such as the included HDX API. To add another, extend the configuration in [iningestion/ingest.config](iningestion/ingest.config). The ingestion script will process this data and import into the Data Recipes AI database. This works for simple APIs with relatively low data volumes, and may need some adjustment depending on the complexity of the API.
+
+## API interaction without ingestion
+
+Some APIs are too extensive to ingest. These can be defined as tools (functions) for the assistant, which can query the API on request to get data. See [assistants/recipes_agents/create_update_assistant.py](assistants/recipes_agents/create_update_assistant.py) which already has a couple of functions which integrate with APIs which you could extend for new data sources.
+
+## Files for the assistant
+
+As mentioned in the main [README](README.md), the assistant can be provided your data in the form of data files (eg CSV, Excel) and documents (eg PDF and word). These are available to the assistant for all interactions. Additionally, users can upload files during conversation. In both cases analysis is done by the LLM assistant and should be treated with caution.
+
 # Deployment
 
 We will add more details here soon, for now, here are some notes on Azure ...

diff --git a/README.md b/README.md
@@ -39,6 +39,15 @@ This repo contains a docker-compose environment that will run the following comp
 - (Azure) Open AI Assistant creation tools to create assistants that are aware of the data sources available in the data recipes ai environment 
 - Autogen studio agent team for helping creating recipes [ In progress ]
 
+# What can features are supported?
+
+- Ability to create data recipes using LLMs, these can be served to end users via chat
+- Ability for end users to access memories and recipes using a chat interface. Memories will present saved results, recipes will run to get latest results
+- Ingestion of openapi standard datasets to a database to enable conversational data analysis using Text-To-SQL
+- Ability to provide the assistant documents (eg PDF, DOCX) and datafiles (eg CSV, XLSX) for analysis in all chats
+- Ability for user to upload their own documents for analysis
+- Attribution and footers to indicate where caution is needed on LLM analysis, versus a recipe a user created
+
 # Quick start
 
 1. Install Docker if you don't have it already, see [here](https://www.docker.com/products/docker-desktop/)
@@ -89,6 +98,8 @@ This repo contains a docker-compose environment that will run the following comp
     ASSISTANTS_BOT_NAME=<Your assistant name, eg "Humanitarian AI Assistant">
     ```
 
+    Be aware that lower-power models such as GPT-3.5-Turbo can serve recipes and carry out basic chat, but perform poorly for analysis and code generation.
+
     Not needed for quick start, but if you want to run ingestion of data with the new HDX API, then you will need to set ...
 
     `HAPI_API_TOKEN=<See https://hdx-hapi.readthedocs.io/en/latest/getting-started/>`
@@ -113,7 +124,7 @@ This repo contains a docker-compose environment that will run the following comp
 
     Make note of the assitant ID, then edit your `.env` file and using it set variable `ASSISTANTS_ID`.
 
-    Note: (i) If you rerun `create_update_assistant.py` once `ASSISTANTS_ID` is set, the script will update the assistant rather than create a new one; (ii) You can also add your own data, pdf, docx, csv, xlsx files for the assistant to use, see section 'Adding your own files for the assistant to analyze' below.
+    Note: (i) If you rerun `create_update_assistant.py` once `ASSISTANTS_ID` is set, the script will update the assistant rather than create a new one. You will need to do this if trying different models; (ii) You can also add your own data, pdf, docx, csv, xlsx files for the assistant to use, see section 'Adding your own files for the assistant to analyze' below.
 
 7. Restart so the assistant ID is set, `docker compose up -d`
 
@@ -127,7 +138,6 @@ The steps above are mostly one-time. Going forward you only need to stop and sta
 - To start the environment `docker compose up -d`, then go to [http://localhost:8000/](http://localhost:8000/)
 - To start with rebuild `docker compose up -d --build` (for more details about development, see [CONTRIBUTION](CONTRIBUTION.md))
 
-
 ## Using Recipes
 
 We are in a phase of research to identify and improve recipes, but for now the system comes with some basic examples to illustrate. To find out the list, enter "Get all recipes" in the chat interface.
@@ -157,6 +167,14 @@ Note: By default, rerunning the ingestion will not download data if the file alr
 
 `docker compose exec ingestion python ingest.py --force_download`
 
+### Analysis on Uploaded files
+
+As mentioned below, it is possible to add files the assistant can use on its creation. These can be used for all chats.
+
+Additionally, you can upload CSV, Excel files for LLM-poweered analysis, as well as documents. Please note though, this is LLM analysis rather than recipes and the results should be treated with caution. 
+
+
+
 #### Running ingestion without running full environment
 
 If you want to *just* download data and not run the full environment, this is possible as follows:
@@ -172,6 +190,10 @@ Then run ingestion in download only mode ...
 
 5. `python ingest.py --skip_processing --skip_uploading`
 
+#### Adding new data sources
+
+To add new ingestion data sources, please refer to [CONTRIBUTION](CONTRIBUTION.md)
+
 # Managing recipes
 
 The management of recipes is part of the human in the loop approach of this repo. New recipes are created in status pending and only get marked as approved, once they have been verified by a recipe manager. Recipe managers can 'check out' recipes from the database into their local development environment such as VS Code to run, debug, and edit the recipes, before checking them back in. To make this process platform independent, recipes are checked out into a docker container, which can be used as the runtime environment to run the recipes via VSCode. 

diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
@@ -4,8 +4,9 @@ services:
       context: .
       dockerfile: ./flows/chainlit-ui-evaluation/Dockerfile
       args:
-        OPENAI_API_KEY: ${OPENAI_API_KEY}
+        OPENAI_API_KEY: ${ASSISTANTS_API_KEY}
         OPENAI_API_ENDPOINT: ${ASSISTANTS_BASE_URL}
+        OPENAI_API_VERSION: ${ASSISTANTS_API_VERSION}
     container_name: recipes-ai-promptflow
     env_file:
       - .env

diff --git a/flows/chainlit-ui-evaluation/Dockerfile b/flows/chainlit-ui-evaluation/Dockerfile
@@ -20,6 +20,9 @@ ENV OPENAI_API_KEY=$OPENAI_API_KEY
 ARG OPENAI_API_ENDPOINT
 ENV OPENAI_API_ENDPOINT=$OPENAI_API_ENDPOINT
 
+ARG OPENAI_API_VERSION
+ENV OPENAI_API_VERSION=$OPENAI_API_VERSION
+
 ARG CHAT_URL
 ENV CHAT_URL=$CHAT_URL
 
@@ -48,7 +51,7 @@ RUN pip3 install keyrings.alt
 
 # Set up Connections
 RUN pf connection create --file ./openai.yaml --set api_key=$OPENAI_API_KEY  --name open_ai_connection 
-RUN pf connection create --file ./azure_openai.yaml --set api_key=$OPENAI_API_KEY --set api_base=$OPENAI_API_ENDPOINT --name azure_open_ai_connection 
+RUN pf connection create --file ./azure_openai.yaml --set api_version=$OPENAI_API_VERSION --set api_key=$OPENAI_API_KEY --set api_base=$OPENAI_API_ENDPOINT --name azure_open_ai_connection 
 
 RUN echo "DEBUG DOCKER"
 RUN which python

diff --git a/flows/chainlit-ui-evaluation/azure_openai.yaml b/flows/chainlit-ui-evaluation/azure_openai.yaml
@@ -3,4 +3,5 @@ name: azure_open_ai_connection
 type: azure_open_ai
 api_key: "<user-input>"
 api_base: "<user-input>"
-api_type: "azure"
+api_type: "azure"
+api_version: "<user-input>"