Skip to content

Commit

Permalink
Merge pull request #72 from datakind/feat/selenium-e2e-3
Browse files Browse the repository at this point in the history
Migration of tests to use Azure instead of OpenAI
  • Loading branch information
dividor authored Jul 12, 2024
2 parents 34ca22d + 67aaa3e commit 0ae8e8f
Show file tree
Hide file tree
Showing 14 changed files with 153 additions and 71 deletions.
39 changes: 12 additions & 27 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,15 @@ RECIPE_DB_CONN_STRING=postgresql://${POSTGRES_RECIPE_USER}:${POSTGRES_RECIPE_PAS
# These control how recipes are retrieved and generated using LLMs.
#
# If you are using Azure OpenAI. Note, in Playground in Azure, you can 'View code' to get these
#RECIPES_OPENAI_API_TYPE=azure
#RECIPES_OPENAI_API_KEY=
#RECIPES_OPENAI_API_ENDPOINT=<eg https://<YOUR DEPLOYMENT NAME>.openai.azure.com/>
#RECIPES_OPENAI_API_VERSION=2024-05-01-preview
#RECIPES_MODEL=<The deployment name you created in Azure, eg gpt-4o>
RECIPES_OPENAI_API_TYPE=<azure or openai>
RECIPES_OPENAI_API_KEY=<API Key>
RECIPES_OPENAI_API_ENDPOINT=<only for Azure, eg https://<YOUR DEPLOYMENT NAME>.openai.azure.com/>
RECIPES_OPENAI_API_VERSION=<only for Azure, eg 2024-02-15-preview>
RECIPES_MODEL=<On Opne AI model name, on Azure the deployment name you created in Azure, eg gpt-4o>
#
# Leave these as-is for quick start
#RECIPES_OPENAI_TEXT_COMPLETION_DEPLOYMENT_NAME=text-embedding-ada-002
#RECIPES_BASE_URL=${RECIPES_OPENAI_API_ENDPOINT}

# OpenAI example
RECIPES_OPENAI_API_TYPE=openai
RECIPES_OPENAI_API_KEY=
RECIPES_MODEL=gpt-4o
RECIPES_OPENAI_TEXT_COMPLETION_DEPLOYMENT_NAME=text-embedding-ada-002
RECIPES_BASE_URL=${RECIPES_OPENAI_API_ENDPOINT}

# Variables that control matching of recipes
# Memory cutoff for the AI model (lower is more similar)
Expand Down Expand Up @@ -74,25 +68,17 @@ HAPI_API_TOKEN=
# Parameters for the AI assistant used in the chat interface, to serve recipes and carry out
# on-the-fly-analysis
#
# # If you are using Azure OpenAI. Note, in Playground in Azure, you can 'View code' to get these
#ASSISTANTS_API_TYPE=azure
#ASSISTANTS_API_KEY=<API Key as found on the Azure OpenAI resource>
#ASSISTANTS_ID=<ID of the assistant you created in OpenAI. Leave blank if you do not have one yet>
#ASSISTANTS_BASE_URL=<eg https://<YOUR DEPLOYMENT NAME>.openai.azure.com/>
#ASSISTANTS_API_VERSION=2024-05-01-preview
#ASSISTANTS_MODEL=<The deployment name of the model you created in Azure which the assitant uses, eg gpt-4o>
#ASSISTANTS_BOT_NAME=<Your assistant name, eg "Humanitarian AI Assistant">

# ======= START: OpenAI (ie not Azure) =================
ASSISTANTS_API_TYPE=openai
OPENAI_API_KEY=<The API key you created on OpenAI>
ASSISTANTS_API_TYPE=<azure or openai>
ASSISTANTS_API_KEY=<API Key as found on the Azure OpenAI resource>
ASSISTANTS_ID=<ID of the assistant you created in OpenAI. Leave blank if you do not have one yet>
ASSISTANTS_MODEL=<The model your assistant uses>
ASSISTANTS_BASE_URL=<for Azure only, eg https://<YOUR DEPLOYMENT NAME>.openai.azure.com/>
ASSISTANTS_API_VERSION=<For Azure only, eg 2024-02-15-preview>
ASSISTANTS_MODEL=<On Open AI, the model name, on Azure the deployment name of the model you created in Azure which the assitant uses, eg gpt-4o>
ASSISTANTS_BOT_NAME=<Your assistant name, eg "Humanitarian AI Assistant">

# Leave as-is
ASSISTANTS_API_KEY=${OPENAI_API_KEY}
ASSISTANTS_BASE_URL=""

#==================================================#
# Deployments Settings #
#==================================================#
Expand Down Expand Up @@ -124,4 +110,3 @@ LITERAL_API_KEY=
CHAINLIT_AUTH_SECRET="1R_FKRaiv0~5bqoQurBx34ctOD8kM%a=YvIx~fVmYLVd>B5vWa>e9rDX?6%^iCOv"
USER_LOGIN=muppet-data-chef
USER_PASSWORD=hB%1b36!!8-v

54 changes: 45 additions & 9 deletions .github/workflows/e2e_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ jobs:
runs-on: ubuntu-latest
environment: "GitHub Actions 1"
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ASSISTANTS_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ASSISTANTS_API_KEY: ${{ secrets.ASSISTANTS_API_KEY }}
ASSISTANTS_API_VERSION: ${{ secrets.ASSISTANTS_API_VERSION }}
ASSISTANTS_API_TYPE: ${{ secrets.ASSISTANTS_API_TYPE }}
ASSISTANTS_ID: ${{ secrets.ASSISTANTS_ID }}
ASSISTANTS_BASE_URL: ${{ secrets.ASSISTANTS_BASE_URL }}
Expand All @@ -41,14 +41,18 @@ jobs:
RECIPES_OPENAI_API_KEY: ${{ secrets.RECIPES_OPENAI_API_KEY }}
RECIPES_MODEL: ${{ secrets.RECIPES_MODEL }}
RECIPES_OPENAI_TEXT_COMPLETION_DEPLOYMENT_NAME: ${{ secrets.RECIPES_OPENAI_TEXT_COMPLETION_DEPLOYMENT_NAME }}
RECIPES_OPENAI_API_ENDPOINT: ${{ secrets.RECIPES_OPENAI_API_ENDPOINT }}
RECIPES_OPENAI_API_VERSION: ${{ secrets.RECIPES_OPENAI_API_VERSION }}
RECIPES_BASE_URL: ${{ secrets.RECIPES_BASE_URL }}

RECIPES_MEMORY_SIMILARITY_CUTOFF: ${{ secrets.RECIPES_MEMORY_SIMILARITY_CUTOFF }}
RECIPES_RECIPE_SIMILARITY_CUTOFF: ${{ secrets.RECIPES_RECIPE_SIMILARITY_CUTOFF }}
RECIPES_HELPER_FUNCTION_SIMILARITY_CUTOFF: ${{ secrets.RECIPES_HELPER_FUNCTION_SIMILARITY_CUTOFF }}
RECIPES_MODEL_TEMP: ${{ secrets.RECIPES_MODEL_TEMP }}
RECIPES_MODEL_MAX_TOKENS: ${{ secrets.RECIPES_MODEL_MAX_TOKENS }}

IMAGE_HOST: ${{ secrets.IMAGE_HOST }}
RECIPE_SERVER_API: ${{ secrets.RECIPE_SERVER_API_FROM_GH_HOST }}
RECIPE_SERVER_API: ${{ secrets.RECIPE_SERVER_API }}
CHAT_URL: ${{ secrets.CHAT_URL }}

CHAINLIT_AUTH_SECRET: ${{ secrets.CHAINLIT_AUTH_SECRET }}
Expand Down Expand Up @@ -77,11 +81,27 @@ jobs:
# TODO docker-compose files should be refactored to use scopes instead of different versions for each environment
echo "Starting docker containers for dbs and server ..."
docker-compose pull
docker-compose up -d --build
docker-compose up -d --build datadb recipedb server chat
# TODO: For some reason, maybe buildkit, in Github docker compose builds the image differently, and it doesn't work. Individual image build works.
docker build --build-arg OPENAI_API_KEY=$OPENAI_API_KEY --build-arg CHAT_URL=$CHAT_URL --build-arg OPENAI_API_ENDPOINT=$OPENAI_API_ENDPOINT --no-cache -t promptflow -f ./flows/chainlit-ui-evaluation/Dockerfile .
docker run --env RECIPES_MODEL_MAX_TOKENS=${RECIPES_MODEL_MAX_TOKENS} --env RECIPES_MODEL_TEMP=${RECIPES_MODEL_TEMP} --env RECIPES_OPENAI_API_TYPE=${ASSISTANTS_API_TYPE} --env RECIPES_OPENAI_API_KEY=${ASSISTANTS_API_KEY} --env RECIPES_MODEL=${RECIPES_MODEL} --env RECIPES_BASE_URL=${RECIPES_BASE_URL} --env USER_LOGIN=${USER_LOGIN} --env USER_PASSWORD=${USER_PASSWORD} --env CHAT_URL=${CHAT_URL} --network=data-recipes-ai_default -d --name promptflow promptflow
docker build --build-arg OPENAI_API_KEY=$ASSISTANTS_API_KEY \
--build-arg OPENAI_API_ENDPOINT=$ASSISTANTS_BASE_URL \
--build-arg OPENAI_API_VERSION=$ASSISTANTS_API_VERSION \
--build-arg CHAT_URL=$CHAT_URL \
--build-arg OPENAI_API_ENDPOINT=$ASSISTANTS_BASE_URL \
--no-cache -t promptflow -f ./flows/chainlit-ui-evaluation/Dockerfile .
docker run --env RECIPES_MODEL_MAX_TOKENS=${RECIPES_MODEL_MAX_TOKENS} \
--env RECIPES_MODEL_TEMP=${RECIPES_MODEL_TEMP} \
--env RECIPES_OPENAI_API_TYPE=${RECIPES_OPENAI_API_TYPE} \
--env RECIPES_OPENAI_API_KEY=${RECIPES_OPENAI_API_KEY} \
--env RECIPES_OPENAI_API_VERSION=${ASSISTANTS_API_VERSION} \
--env RECIPES_MODEL=${RECIPES_MODEL} \
--env RECIPES_BASE_URL=${RECIPES_BASE_URL} \
--env USER_LOGIN=${USER_LOGIN} \
--env USER_PASSWORD=${USER_PASSWORD} \
--env CHAT_URL=${CHAT_URL} \
--network=data-recipes-ai_default -d --name promptflow promptflow
- name: Check logs
run: |
Expand Down Expand Up @@ -115,14 +135,30 @@ jobs:
# uses: lhotari/action-upterm@v1

#- name: DEBUG - Run Selenium outside of promptflow
# run: |
# docker exec promptflow python call_assistant.py
# run: |
# docker exec promptflow python call_assistant.py

- name: Run tests
- name: Run tests
run: |
env > .env
docker exec promptflow pf run create --flow . --data ./data.jsonl --stream --column-mapping query='${data.query}' context='${data.context}' chat_history='${data.chat_history}' --name base_run
- name: Check logs post-tests
run: |
docker ps
echo "logs datadb ..."
docker compose logs datadb
echo "logs promptflow ..."
docker logs promptflow
echo "logs chat ..."
docker compose logs chat
echo "logs server ..."
docker compose logs server
- name: Show results
run: |
docker exec promptflow pf run show-details -n base_run
Expand Down
13 changes: 8 additions & 5 deletions .github/workflows/get_memory_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ jobs:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ASSISTANTS_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ASSISTANTS_API_TYPE: ${{ secrets.ASSISTANTS_API_TYPE }}
ASSISTANTS_API_VERSION: ${{ secrets.ASSISTANTS_API_VERSION }}
ASSISTANTS_ID: ${{ secrets.ASSISTANTS_ID }}
ASSISTANTS_BASE_URL: ${{ secrets.ASSISTANTS_BASE_URL }}
ASSISTANTS_MODEL: ${{ secrets.ASSISTANTS_MODEL }}
Expand All @@ -30,11 +31,15 @@ jobs:
POSTGRES_RECIPE_PASSWORD: ${{ secrets.POSTGRES_RECIPE_PASSWORD }}

RECIPE_DB_CONN_STRING: "postgresql://${{ secrets.POSTGRES_RECIPE_USER }}:${{ secrets.POSTGRES_RECIPE_PASSWORD }}@${{ secrets.POSTGRES_RECIPE_HOST }}:${{ secrets.POSTGRES_RECIPE_PORT }}/${{ secrets.POSTGRES_RECIPE_DB }}"

RECIPES_BASE_URL: ${{ secrets.RECIPES_BASE_URL }}
RECIPES_OPENAI_API_TYPE: ${{ secrets.RECIPES_OPENAI_API_TYPE }}
RECIPES_OPENAI_API_KEY: ${{ secrets.RECIPES_OPENAI_API_KEY }}
RECIPES_MODEL: ${{ secrets.RECIPES_MODEL }}
RECIPES_OPENAI_TEXT_COMPLETION_DEPLOYMENT_NAME: ${{ secrets.RECIPES_OPENAI_TEXT_COMPLETION_DEPLOYMENT_NAME }}
RECIPES_OPENAI_API_ENDPOINT: ${{ secrets.RECIPES_OPENAI_API_ENDPOINT }}
RECIPES_OPENAI_API_VERSION: ${{ secrets.RECIPES_OPENAI_API_VERSION }}


RECIPES_MEMORY_SIMILARITY_CUTOFF: ${{ secrets.RECIPES_MEMORY_SIMILARITY_CUTOFF }}
RECIPES_RECIPE_SIMILARITY_CUTOFF: ${{ secrets.RECIPES_RECIPE_SIMILARITY_CUTOFF }}
RECIPES_HELPER_FUNCTION_SIMILARITY_CUTOFF: ${{ secrets.RECIPES_HELPER_FUNCTION_SIMILARITY_CUTOFF }}
Expand All @@ -48,8 +53,6 @@ jobs:
USER_LOGIN: ${{ secrets.USER_LOGIN }}
USER_PASSWORD: ${{ secrets.USER_PASSWORD }}

COLUMNS: 150

steps:

- name: Checkout
Expand Down Expand Up @@ -102,5 +105,5 @@ jobs:
- name: Run tests
run: |
echo "exec into container ..."
docker compose exec server bash -c "cd tests/ && pytest"
docker compose exec server bash -c "cd tests/ && pytest -vv"
20 changes: 19 additions & 1 deletion CONTRIBUTION.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,18 @@ Then ...
`Dev Containers: Attach to remote container`.

Select the promptflow container. This opens a new VSCode window - use it for the next steps.
4. Install Promptflow add-in
4. It should happen automatically, but if not, install Promptflow add-in
5. Open folder `/app`
6. Click on `flow.dag.yaml`
7. Top left of main pane, click on 'Visual editor'
- If you are taken to the promptflow 'Install dependencies'' screen, change the Python runtime to be ` /azureml-envs/prompt-flow/runtime/bin/python` 'runtime', then close and re-open `flow.dag.yaml`
8. On the Groundedness node, select your new connection
9. You can no run by clicking the play icon. See Promptflow documentation for more details

#### Changing between Azure OpenAI <> OpenAI

As noted in the README, the repo supports assistants on OpenAI or Azure OpenAI. The README has instructions on how to change in the `.env` file, but you will also have to change the connection in the promptflow groundedness node accordingly.

## GitHub Workflow

As many other open source projects, we use the famous
Expand Down Expand Up @@ -187,6 +191,20 @@ Then ...
8. On the Groundedness node, select your new connection
9. You can no run by clicking the play icon. See Promptflow documentation for more details

# Adding new Data sources

## Open API (not OpenAI!) data sources

As mentioned in the main [README](README.md), the assistant can be used with openapi standard API, such as the included HDX API. To add another, extend the configuration in [iningestion/ingest.config](iningestion/ingest.config). The ingestion script will process this data and import into the Data Recipes AI database. This works for simple APIs with relatively low data volumes, and may need some adjustment depending on the complexity of the API.

## API interaction without ingestion

Some APIs are too extensive to ingest. These can be defined as tools (functions) for the assistant, which can query the API on request to get data. See [assistants/recipes_agents/create_update_assistant.py](assistants/recipes_agents/create_update_assistant.py) which already has a couple of functions which integrate with APIs which you could extend for new data sources.

## Files for the assistant

As mentioned in the main [README](README.md), the assistant can be provided your data in the form of data files (eg CSV, Excel) and documents (eg PDF and word). These are available to the assistant for all interactions. Additionally, users can upload files during conversation. In both cases analysis is done by the LLM assistant and should be treated with caution.

# Deployment

We will add more details here soon, for now, here are some notes on Azure ...
Expand Down
26 changes: 24 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,15 @@ This repo contains a docker-compose environment that will run the following comp
- (Azure) Open AI Assistant creation tools to create assistants that are aware of the data sources available in the data recipes ai environment
- Autogen studio agent team for helping creating recipes [ In progress ]

# What can features are supported?

- Ability to create data recipes using LLMs, these can be served to end users via chat
- Ability for end users to access memories and recipes using a chat interface. Memories will present saved results, recipes will run to get latest results
- Ingestion of openapi standard datasets to a database to enable conversational data analysis using Text-To-SQL
- Ability to provide the assistant documents (eg PDF, DOCX) and datafiles (eg CSV, XLSX) for analysis in all chats
- Ability for user to upload their own documents for analysis
- Attribution and footers to indicate where caution is needed on LLM analysis, versus a recipe a user created

# Quick start

1. Install Docker if you don't have it already, see [here](https://www.docker.com/products/docker-desktop/)
Expand Down Expand Up @@ -89,6 +98,8 @@ This repo contains a docker-compose environment that will run the following comp
ASSISTANTS_BOT_NAME=<Your assistant name, eg "Humanitarian AI Assistant">
```
Be aware that lower-power models such as GPT-3.5-Turbo can serve recipes and carry out basic chat, but perform poorly for analysis and code generation.
Not needed for quick start, but if you want to run ingestion of data with the new HDX API, then you will need to set ...
`HAPI_API_TOKEN=<See https://hdx-hapi.readthedocs.io/en/latest/getting-started/>`
Expand All @@ -113,7 +124,7 @@ This repo contains a docker-compose environment that will run the following comp
Make note of the assitant ID, then edit your `.env` file and using it set variable `ASSISTANTS_ID`.
Note: (i) If you rerun `create_update_assistant.py` once `ASSISTANTS_ID` is set, the script will update the assistant rather than create a new one; (ii) You can also add your own data, pdf, docx, csv, xlsx files for the assistant to use, see section 'Adding your own files for the assistant to analyze' below.
Note: (i) If you rerun `create_update_assistant.py` once `ASSISTANTS_ID` is set, the script will update the assistant rather than create a new one. You will need to do this if trying different models; (ii) You can also add your own data, pdf, docx, csv, xlsx files for the assistant to use, see section 'Adding your own files for the assistant to analyze' below.
7. Restart so the assistant ID is set, `docker compose up -d`
Expand All @@ -127,7 +138,6 @@ The steps above are mostly one-time. Going forward you only need to stop and sta
- To start the environment `docker compose up -d`, then go to [http://localhost:8000/](http://localhost:8000/)
- To start with rebuild `docker compose up -d --build` (for more details about development, see [CONTRIBUTION](CONTRIBUTION.md))
## Using Recipes
We are in a phase of research to identify and improve recipes, but for now the system comes with some basic examples to illustrate. To find out the list, enter "Get all recipes" in the chat interface.
Expand Down Expand Up @@ -157,6 +167,14 @@ Note: By default, rerunning the ingestion will not download data if the file alr
`docker compose exec ingestion python ingest.py --force_download`
### Analysis on Uploaded files
As mentioned below, it is possible to add files the assistant can use on its creation. These can be used for all chats.
Additionally, you can upload CSV, Excel files for LLM-poweered analysis, as well as documents. Please note though, this is LLM analysis rather than recipes and the results should be treated with caution.
#### Running ingestion without running full environment
If you want to *just* download data and not run the full environment, this is possible as follows:
Expand All @@ -172,6 +190,10 @@ Then run ingestion in download only mode ...
5. `python ingest.py --skip_processing --skip_uploading`
#### Adding new data sources
To add new ingestion data sources, please refer to [CONTRIBUTION](CONTRIBUTION.md)
# Managing recipes
The management of recipes is part of the human in the loop approach of this repo. New recipes are created in status pending and only get marked as approved, once they have been verified by a recipe manager. Recipe managers can 'check out' recipes from the database into their local development environment such as VS Code to run, debug, and edit the recipes, before checking them back in. To make this process platform independent, recipes are checked out into a docker container, which can be used as the runtime environment to run the recipes via VSCode.
Expand Down
3 changes: 2 additions & 1 deletion docker-compose-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ services:
context: .
dockerfile: ./flows/chainlit-ui-evaluation/Dockerfile
args:
OPENAI_API_KEY: ${OPENAI_API_KEY}
OPENAI_API_KEY: ${ASSISTANTS_API_KEY}
OPENAI_API_ENDPOINT: ${ASSISTANTS_BASE_URL}
OPENAI_API_VERSION: ${ASSISTANTS_API_VERSION}
container_name: recipes-ai-promptflow
env_file:
- .env
Expand Down
5 changes: 4 additions & 1 deletion flows/chainlit-ui-evaluation/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ ENV OPENAI_API_KEY=$OPENAI_API_KEY
ARG OPENAI_API_ENDPOINT
ENV OPENAI_API_ENDPOINT=$OPENAI_API_ENDPOINT

ARG OPENAI_API_VERSION
ENV OPENAI_API_VERSION=$OPENAI_API_VERSION

ARG CHAT_URL
ENV CHAT_URL=$CHAT_URL

Expand Down Expand Up @@ -48,7 +51,7 @@ RUN pip3 install keyrings.alt

# Set up Connections
RUN pf connection create --file ./openai.yaml --set api_key=$OPENAI_API_KEY --name open_ai_connection
RUN pf connection create --file ./azure_openai.yaml --set api_key=$OPENAI_API_KEY --set api_base=$OPENAI_API_ENDPOINT --name azure_open_ai_connection
RUN pf connection create --file ./azure_openai.yaml --set api_version=$OPENAI_API_VERSION --set api_key=$OPENAI_API_KEY --set api_base=$OPENAI_API_ENDPOINT --name azure_open_ai_connection

RUN echo "DEBUG DOCKER"
RUN which python
Expand Down
3 changes: 2 additions & 1 deletion flows/chainlit-ui-evaluation/azure_openai.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ name: azure_open_ai_connection
type: azure_open_ai
api_key: "<user-input>"
api_base: "<user-input>"
api_type: "azure"
api_type: "azure"
api_version: "<user-input>"
Loading

0 comments on commit 0ae8e8f

Please sign in to comment.