Skip to content

Commit

Permalink
Merge branch 'main' into feat/automate-stop-words-list-pg
Browse files Browse the repository at this point in the history
  • Loading branch information
polomarcus committed Dec 19, 2024
2 parents 38252c7 + e92e1b4 commit 7b479c4
Show file tree
Hide file tree
Showing 14 changed files with 619 additions and 16 deletions.
21 changes: 21 additions & 0 deletions .github/workflows/dependabot-auto-approve.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: Dependabot auto-approve
on: pull_request

permissions:
pull-requests: write

jobs:
dependabot:
runs-on: ubuntu-latest
if: github.event.pull_request.user.login == 'dependabot[bot]' && github.repository == 'dataforgoodfr/quotaclimat'
steps:
- name: Dependabot metadata
id: metadata
uses: dependabot/fetch-metadata@@v2
with:
github-token: "${{ secrets.GITHUB_TOKEN }}"
- name: Approve a PR
run: gh pr review --approve "$PR_URL"
env:
PR_URL: ${{github.event.pull_request.html_url}}
GH_TOKEN: ${{secrets.GITHUB_TOKEN}}
24 changes: 20 additions & 4 deletions .github/workflows/deploy-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,18 @@ jobs:
- name: Push mediatree_import Image
run: docker push --all-tags ${{ secrets.CONTAINER_REGISTRY_ENDPOINT }}/mediatree_import

- name: Build ingest_to_db image
run: docker build -f Dockerfile_ingest . -t ${{ secrets.CONTAINER_REGISTRY_ENDPOINT }}/ingest_to_db
- name: Push ingest_to_db Image
run: docker push ${{ secrets.CONTAINER_REGISTRY_ENDPOINT }}/ingest_to_db
# Not used anymore
# - name: Build ingest_to_db image
# run: docker build -f Dockerfile_ingest . -t ${{ secrets.CONTAINER_REGISTRY_ENDPOINT }}/ingest_to_db
# - name: Push ingest_to_db Image
# run: docker push ${{ secrets.CONTAINER_REGISTRY_ENDPOINT }}/ingest_to_db

- name: Build s3 image
run: docker build -f Dockerfile_api_to_s3 . -t ${{ secrets.CONTAINER_REGISTRY_ENDPOINT }}/s3:${{ env.PROJECT_VERSION }}
- name: Tag s3 latest image
run: docker tag ${{ secrets.CONTAINER_REGISTRY_ENDPOINT }}/s3:${{ env.PROJECT_VERSION }} ${{ secrets.CONTAINER_REGISTRY_ENDPOINT }}/s3:latest
- name: Push s3 Image
run: docker push ${{ secrets.CONTAINER_REGISTRY_ENDPOINT }}/s3:${{ env.PROJECT_VERSION }}
- name: update scaleway job definition with version mediatree_import
uses: jawher/[email protected]
env:
Expand All @@ -69,3 +76,12 @@ jobs:
SCW_ZONE: ${{ secrets.SCW_ZONE }}
with:
args: jobs definition update ${{ secrets.SCALEWAY_JOB_IMPORT_ID }} image-uri=${{ secrets.CONTAINER_REGISTRY_ENDPOINT }}/mediatree_import:${{ env.PROJECT_VERSION }}
- name: update scaleway job definition with version s3
uses: jawher/[email protected]
env:
SCW_ACCESS_KEY: ${{ secrets.SCW_ACCESS_KEY }}
SCW_SECRET_KEY: ${{ secrets.SCW_SECRET_KEY }}
SCW_ORGANIZATION_ID: ${{ secrets.SCW_ORGANIZATION_ID }}
SCW_ZONE: ${{ secrets.SCW_ZONE }}
with:
args: jobs definition update ${{ secrets.SCALEWAY_JOB_S3_ID }} image-uri=${{ secrets.CONTAINER_REGISTRY_ENDPOINT }}/s3:${{ env.PROJECT_VERSION }}
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
secrets/pwd_api.txt
secrets/username_api.txt
secrets/*
s3/*
documents-experts/
cc-bio.json
*.xlsx
Expand Down
47 changes: 47 additions & 0 deletions Dockerfile_api_to_s3
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#from https://medium.com/@albertazzir/blazing-fast-python-docker-builds-with-poetry-a78a66f5aed0
FROM python:3.12.7 as builder

ENV VIRTUAL_ENV=/app/.venv

ENV POETRY_NO_INTERACTION=1 \
POETRY_VIRTUALENVS_IN_PROJECT=1 \
POETRY_VIRTUALENVS_CREATE=1 \
POETRY_CACHE_DIR=/tmp/poetry_cache

WORKDIR /app

COPY pyproject.toml poetry.lock ./

RUN pip install poetry==1.8.3

RUN poetry install

# The runtime image, used to just run the code provided its virtual environment
FROM python:3.12.7-slim as runtime

WORKDIR /app

ENV VIRTUAL_ENV=/app/.venv
ENV PATH="/app/.venv/bin:$PATH"
ENV PATH="$PYENV_ROOT/bin:$PATH"
ENV PYTHONPATH=/app

COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}

# App code is include with docker-compose as well
COPY quotaclimat ./quotaclimat
COPY postgres ./postgres
COPY pyproject.toml pyproject.toml
COPY alembic/ ./alembic
COPY alembic.ini ./alembic.ini
COPY transform_program.py ./transform_program.py

# healthcheck
EXPOSE 5050

# Use a separate script to handle migrations and start the application
COPY docker-entrypoint.sh ./docker-entrypoint.sh
RUN chmod +x ./docker-entrypoint.sh


ENTRYPOINT ["python", "quotaclimat/data_processing/mediatree/s3/api_to_s3.py"]
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,16 @@ Program data will not be updated to avoid lock concurrent issues when using `UPD

**With the docker-entrypoint.sh this command is done automatically, so for production uses, you will not have to run this command.**

# Mediatre to S3
For a security nets, we have configured at data pipeline from Mediatree API to S3 (Object Storage Scaleway).

Env variable used :
* START_DATE (unixtimestamp such as mediatree service)
* CHANNEL (such as mediatree service)
* BUCKET : Scaleway Access key
* BUCKET_SECRET : Scaleway Secret key
* BUCKET_NAME

## Production monitoring
* Use scaleway
* Use [Ray dashboard] on port 8265
Expand Down
66 changes: 62 additions & 4 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,18 @@ services:
POSTGRES_PORT: 5432
COMPARE_DURATION: "true"
MODIN_ENGINE: ray
MEDIATREE_USER : /run/secrets/username_api
MEDIATREE_PASSWORD: /run/secrets/pwd_api
BUCKET: /run/secrets/bucket
BUCKET_NAME: mediatree
BUCKET_SECRET: /run/secrets/bucket_secret
MODIN_CPUS: 4 # "https://modin.readthedocs.io/en/0.11.0/using_modin.html#reducing-or-limiting-the-resources-modin-can-use"
tty: true # colorize terminal
secrets:
- pwd_api
- username_api
- bucket
- bucket_secret
volumes:
- ./quotaclimat/:/app/quotaclimat/
- ./postgres/:/app/postgres/
Expand Down Expand Up @@ -204,6 +214,50 @@ services:
postgres_db:
condition: service_healthy

api_to_s3:
ports:
- 5666:5666
- 8265:8265
build:
context: ./
dockerfile: Dockerfile_api_to_s3
environment:
ENV: docker # change me to prod for real cases
LOGLEVEL: DEBUG # Change me to info (debug, info, warning, error) to have less log
PYTHONPATH: /app
PORT_HS: 5666 # healthcheck
HEALTHCHECK_SERVER: "0.0.0.0"
# SENTRY_DSN: prod_only
#END_DATE: "2024-02-29" # optional - otherwise end of the month
# START_DATE: 1727610071 # to test batch import
CHANNEL : fr3-idf # to reimport only one channel
MEDIATREE_USER : /run/secrets/username_api
MEDIATREE_PASSWORD: /run/secrets/pwd_api
BUCKET: /run/secrets/bucket
BUCKET_SECRET: /run/secrets/bucket_secret
BUCKET_NAME: mediatree
MEDIATREE_AUTH_URL: https://keywords.mediatree.fr/api/auth/token/
KEYWORDS_URL: https://keywords.mediatree.fr/api/subtitle/ # https://keywords.mediatree.fr/docs/#api-Subtitle-SubtitleList
MODIN_ENGINE: ray
MODIN_CPUS: 6 # "https://modin.readthedocs.io/en/0.11.0/using_modin.html#reducing-or-limiting-the-resources-modin-can-use"
MODIN_MEMORY: 12000000000 # 1Gb
RAY_memory_usage_threshold: 0.95
volumes:
- ./quotaclimat/:/app/quotaclimat/
- ./postgres/:/app/postgres/
- ./test/:/app/test/
- ./s3:/app/s3/
secrets:
- pwd_api
- username_api
- bucket
- bucket_secret
depends_on:
nginxtest:
condition: service_healthy
postgres_db:
condition: service_healthy

metabase:
container_name: metabase_barometre
image: metabase/metabase:latest
Expand All @@ -225,7 +279,11 @@ services:
condition: service_healthy

secrets: # https://docs.docker.com/compose/use-secrets/
pwd_api:
file: secrets/pwd_api.txt
username_api:
file: secrets/username_api.txt
pwd_api:
file: secrets/pwd_api.txt
username_api:
file: secrets/username_api.txt
bucket:
file: secrets/scw_bucket.txt
bucket_secret:
file: secrets/scw_bucket_secret.txt
Loading

0 comments on commit 7b479c4

Please sign in to comment.