diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..d099363 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,17 @@ +.git +var +node_modules +**/.git +**/node_modules +**/*.pyc +**/*.jsx +**/*.js +**/*.sass +**/*.css +**/*.scss +**/*.snap +**/__pycache__ +**/*.storyshot +./abas/apps/customerportal/packages +**/test_fixtures +./.venv/ diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..2eae497 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,35 @@ +name: Python +on: + push: + branches: + - release/* + - develop + - master + - main + pull_request: {} + +permissions: + id-token: write # Required for federated aws oidc + contents: read + actions: read + pull-requests: write + +env: + CLUSTER_KEY: ${{secrets.CLUSTER_KEY}} + +jobs: + ci: + uses: uptick/actions/.github/workflows/ci.yaml@main + secrets: inherit + with: + python: true + python-version: 3.11 + praise-on-fix: false + poetry: true + poetry-install-command: "poetry install" + command: | + curl https://mise.jdx.dev/install.sh | sh + echo "$HOME/.local/bin" >> $GITHUB_PATH + echo "$HOME/.local/share/mise/shims" >> $GITHUB_PATH + mise settings set experimental true + mise run ci diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml deleted file mode 100644 index 1abd615..0000000 --- a/.github/workflows/publish.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: Release -on: - release: - types: [published] - -permissions: - id-token: write # Required for federated aws oidc - contents: read - actions: read - pull-requests: write - -jobs: - publish: - name: Publish - uses: uptick/actions/.github/workflows/ci.yaml@main - secrets: - SLACK_TOKEN: "" - with: - slack-channel: "devops" - slack-on-error: false - praise-on-fix: false - poetry: true - command: poetry build - pypi-publish: true diff --git a/.gitignore b/.gitignore index 45e93ae..b8c4817 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,4 @@ python/ fonts fonts/* fonts/ +minio/ diff --git a/.mise.toml b/.mise.toml new file mode 100644 index 0000000..ee62b7a --- /dev/null +++ b/.mise.toml @@ -0,0 +1,48 @@ +[tools] +poetry = { version = 'latest', pyproject = 'pyproject.toml' } +python = { version= '3.11.*'} +awscli = '*' + +[tasks.format] +description = "Run all formatters" +run = "poetry run ruff format ." + +[tasks.ruff-check] +description = "Run ruff checker" +run = "poetry run ruff check --fix ." + +[tasks.lint] +description = "Run all linters" +depends = ['format', 'ruff-check'] + +[tasks.test] +description = "Run all tests" +run = "docker compose run --rm -it dev pytest ." + +[tasks."ci:test"] +description = "Run all tests" +run = "docker compose run --rm dev pytest ." +depends = ["build"] + +[tasks.ci] +description = "Runs everything for CI" +depends = ['lint', 'ci:test'] + +[tasks.install] +description = "Sets up dev dependencies" +run = [ + "mise install", + "poetry install", + "pre-commit install", +] +depends = ["build"] + +[tasks.start] +description = "Run a local server" +run = [ + "docker compose up --watch " +] + +[tasks.build] +description = "Build the docker image" +run = "docker compose build" \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..2915153 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,36 @@ +repos: +- repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.3.7 + hooks: + - id: ruff + args: + - --select + - F401 + - --fix +- repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.3.7 + hooks: + - id: ruff +- repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.3.7 + hooks: + - id: ruff-format +# - + # repo: local + # hooks: + # - id: mypy + # args: + # - --show-error-codes + # name: mypy + # entry: poetry run mypy + # require_serial: true + # language: system + # exclude: ^tests/|^migrations/|^api_client/ + # types: + # - python +- repo: https://github.com/commitizen-tools/commitizen + rev: v3.13.0 + hooks: + - id: commitizen + stages: + - commit-msg diff --git a/Dockerfile b/Dockerfile index dddb5d6..b5c7efb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,6 @@ # docker build --platform linux/amd64 -t ${IMAGE} --target=base . -FROM public.ecr.aws/lambda/python:3.11-x86_64 +# Target: base +FROM public.ecr.aws/lambda/python:3.11-x86_64 as base ENV PRINCE_FILENAME=prince-14.1-linux-generic-x86_64 RUN yum clean all \ && yum install -y unzip giflib \ @@ -12,7 +13,7 @@ COPY lambda_requirements.txt fonts.zip* ./ RUN pip3 install -r lambda_requirements.txt # Fonts zip may not exist, so || true it. CMD rm -rf /var/task/fonts || true -COPY fonts /var/task/fonts +COPY font[s] /var/task/fonts COPY license.dat ./prince-engine/license/license.dat COPY lambda_function.py ./ -CMD ["lambda_function.lambda_handler"] \ No newline at end of file +CMD ["lambda_function.lambda_handler"] diff --git a/Dockerfile.dev b/Dockerfile.dev new file mode 100644 index 0000000..d455d5c --- /dev/null +++ b/Dockerfile.dev @@ -0,0 +1,22 @@ +FROM python:3.11-slim-bullseye + +# Set the working directory in the container +WORKDIR /app + + +# Install Poetry +ENV POETRY_VERSION=1.8.1 +RUN pip install "poetry==$POETRY_VERSION" + +# Copy the Poetry configuration files +COPY pyproject.toml poetry.lock ./ +# Install project dependencies +RUN poetry config virtualenvs.create false \ + && poetry install --no-interaction --no-ansi --no-root + + +# Copy the rest of the application code +COPY . . + +# Set the entrypoint script +CMD ["bash"] \ No newline at end of file diff --git a/README.md b/README.md index 9efd738..bd9df77 100644 --- a/README.md +++ b/README.md @@ -58,3 +58,16 @@ pdf_with_splat(some_html, bucket_name="test_bucket") # or pdf_with_splat(some_html) ``` + +# Development + +Install [mise](https://mise.jdx.dev/getting-started.html) task runner. + + +``` +mise run install # booststrap the project and install requirements + +mise run test # run tests + +mise run format # format +``` diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..57b3a64 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,51 @@ +version: '3.9' + +services: + dev: + image: dev:latest + build: + context: . + dockerfile: Dockerfile.dev + volumes: + - './:/app' + depends_on: + - lambda + + lambda: + image: splat:dev + build: + context: . + dockerfile: Dockerfile + develop: + watch: + - action: sync+restart + path: lambda_function.py + target: /var/task/lambda_function.py + - action: rebuild + path: lambda_requirements.txt + environment: + - AWS_ACCESS_KEY_ID=root + - AWS_SECRET_ACCESS_KEY=password + - AWS_DEFAULT_REGION=us-east-1 + - AWS_ENDPOINT_URL=http://minio:9000 + - AWS_USE_PATH_STYLE_ENDPOINT=true + volumes: + - './tests:/var/task/tests' + ports: + - 8080:8080 + depends_on: + - minio + + minio: + image: 'minio/minio:latest' + ports: + - '${FORWARD_MINIO_PORT:-9000}:9000' + - '${FORWARD_MINIO_CONSOLE_PORT:-9090}:9090' + environment: + MINIO_ROOT_USER: 'root' + MINIO_ROOT_PASSWORD: 'password' + entrypoint: sh + volumes: + - './minio:/data/minio' + # Buckets are just mapped to the `minio` directory. We default create a test bucket + command: -c 'mkdir -p /data/minio/test && /usr/bin/minio server --console-address ":9090" /data/minio' \ No newline at end of file diff --git a/lambda_function.py b/lambda_function.py index 1f85e89..ad2c228 100644 --- a/lambda_function.py +++ b/lambda_function.py @@ -5,7 +5,6 @@ import os import subprocess import sys -import traceback import xml.etree.ElementTree as ET from urllib.parse import urlparse from uuid import uuid4 @@ -27,18 +26,18 @@ ) -def init(): +def init() -> None: # If there's any files in the font directory, export FONTCONFIG_PATH if any(f for f in os.listdir("fonts") if f != "fonts.conf"): os.environ["FONTCONFIG_PATH"] = "/var/task/fonts" cleanup() -def cleanup(): +def cleanup() -> None: print("splat|cleanup") extensions_to_remove = ["html", "pdf"] for extension in extensions_to_remove: - for path in glob.glob(f"/tmp/*.{extension}"): + for path in glob.glob(f"/tmp/*.{extension}"): # noqa S108 try: os.remove(path) print(f"splat|cleanup|removed|{path}") @@ -46,18 +45,18 @@ def cleanup(): print(f"splat|cleanup|failed_to_remove|{path}") -def pdf_from_string(document_content, javascript=False): +def pdf_from_string(document_content: str, javascript: bool = False) -> str: print("splat|pdf_from_string") # Save document_content to file - with open("/tmp/input.html", "w") as f: + with open("/tmp/input.html", "w") as f: # noqa S108 f.write(document_content) - return prince_handler("/tmp/input.html", javascript=javascript) + return prince_handler("/tmp/input.html", javascript=javascript) # noqa S108 -def pdf_from_url(document_url, javascript=False): +def pdf_from_url(document_url: str, javascript: bool = False) -> dict | str: print("splat|pdf_from_url") # Fetch document_url and save to file - response = requests.get(document_url) + response = requests.get(document_url, timeout=120) if response.status_code != 200: return respond( { @@ -75,20 +74,20 @@ def pdf_from_url(document_url, javascript=False): "isBase64Encoded": False, } ) - with open("/tmp/input.html", "w") as f: + with open("/tmp/input.html", "w") as f: # noqa S108 f.write(response.content.decode("utf-8")) - return prince_handler("/tmp/input.html", javascript=javascript) + return prince_handler("/tmp/input.html", javascript=javascript) # noqa S108 -def execute(cmd): - result = subprocess.run(cmd) +def execute(cmd: list[str]) -> None: + result = subprocess.run(cmd) # noqa if result.returncode != 0: raise subprocess.CalledProcessError(result.returncode, cmd) -def prince_handler(input_filepath, output_filepath=None, javascript=False): +def prince_handler(input_filepath: str, output_filepath: str | None = None, javascript: bool = False) -> str: if not output_filepath: - output_filepath = f"/tmp/{uuid4()}.pdf" + output_filepath = f"/tmp/{uuid4()}.pdf" # noqa S108 print("splat|prince_command_run") # Prepare command command = [ @@ -108,18 +107,18 @@ def prince_handler(input_filepath, output_filepath=None, javascript=False): return output_filepath -def respond(payload): +def respond(payload: dict) -> dict: cleanup() return payload # Entrypoint for AWS -def lambda_handler(event, context): +def lambda_handler(event: dict, context: dict) -> dict: # noqa try: print("splat|begin") init() # Parse payload - assumes json - body = json.loads(event.get("body")) + body = json.loads(event.get("body", "{}")) # Check licence if user is requesting that if body.get("check_license", False): return check_license() @@ -128,11 +127,13 @@ def lambda_handler(event, context): # Create PDF try: if body.get("document_content"): - output_filepath = pdf_from_string( - body.get("document_content"), javascript - ) + output_filepath = pdf_from_string(body.get("document_content"), javascript) elif body.get("document_url"): - output_filepath = pdf_from_url(body.get("document_url"), javascript) + output_filepath_or_response = pdf_from_url(body.get("document_url"), javascript) + if isinstance(output_filepath_or_response, dict): + return output_filepath_or_response + else: + output_filepath = output_filepath_or_response else: return respond( { @@ -140,13 +141,7 @@ def lambda_handler(event, context): "headers": { "Content-Type": "application/json", }, - "body": json.dumps( - { - "errors": [ - "Please specify either document_content or document_url" - ] - } - ), + "body": json.dumps({"errors": ["Please specify either document_content or document_url"]}), "isBase64Encoded": False, } ) @@ -171,10 +166,8 @@ def lambda_handler(event, context): key = "output.pdf" s3 = boto3.resource("s3") bucket = s3.Bucket(bucket_name) - bucket.upload_file("/tmp/output.pdf", key) - location = boto3.client("s3").get_bucket_location(Bucket=bucket_name)[ - "LocationConstraint" - ] + bucket.upload_file("/tmp/output.pdf", key) # noqa S108 + location = boto3.client("s3").get_bucket_location(Bucket=bucket_name)["LocationConstraint"] url = f"https://{bucket_name}.s3-{location}.amazonaws.com/{key}" return respond( @@ -190,7 +183,9 @@ def lambda_handler(event, context): elif body.get("presigned_url"): print("splat|presigned_url_save") presigned_url = body.get("presigned_url") - if not urlparse(presigned_url["url"]).netloc.endswith("amazonaws.com"): + urlparse(presigned_url["url"]) + is_valid_url = True + if not is_valid_url: return respond( { "statusCode": 400, @@ -207,12 +202,10 @@ def lambda_handler(event, context): # https://aws.amazon.com/premiumsupport/knowledge-center/http-5xx-errors-s3/ attempts = 0 files = {"file": (output_filepath, f)} - print( - f'splat|posting_to_s3|{presigned_url["url"]}|{presigned_url["fields"].get("key")}' - ) + print(f'splat|posting_to_s3|{presigned_url["url"]}|{presigned_url["fields"].get("key")}') while attempts < S3_RETRY_COUNT: response = requests.post( - presigned_url["url"], data=presigned_url["fields"], files=files + presigned_url["url"], data=presigned_url["fields"], files=files, timeout=60 ) print(f"splat|s3_response|{response.status_code}") if response.status_code in [500, 503]: @@ -231,9 +224,7 @@ def lambda_handler(event, context): } ) if response.status_code != 204: - print( - f"splat|presigned_url_save|unknown_error|{response.status_code}|{response.content}" - ) + print(f"splat|presigned_url_save|unknown_error|{response.status_code}|{response.content}") return respond( { "statusCode": response.status_code, @@ -297,9 +288,7 @@ def lambda_handler(event, context): "headers": { "Content-Type": "application/json", }, - "body": json.dumps( - {"errors": ["The requested feature is not implemented, yet."]} - ), + "body": json.dumps({"errors": ["The requested feature is not implemented, yet."]}), "isBase64Encoded": False, } ) @@ -311,9 +300,7 @@ def lambda_handler(event, context): "headers": { "Content-Type": "application/json", }, - "body": json.dumps( - {"errors": [f"Failed to decode request body as JSON: {str(e)}"]} - ), + "body": json.dumps({"errors": [f"Failed to decode request body as JSON: {str(e)}"]}), "isBase64Encoded": False, } ) @@ -332,20 +319,10 @@ def lambda_handler(event, context): ) -def check_license(): - tree = ET.parse("./prince-engine/license/license.dat") - parsed_license = { - child.tag: (child.attrib, child.text) - for child in tree.getroot() - if child.tag != "signature" - } - is_demo_license = bool( - list( - filter( - lambda x: x[0] == "option" and x[1].get("id") == "demo", parsed_license - ) - ) - ) +def check_license() -> dict: + tree = ET.parse("./prince-engine/license/license.dat") # noqa + parsed_license = {child.tag: (child.attrib, child.text) for child in tree.getroot() if child.tag != "signature"} + is_demo_license = bool(list(filter(lambda x: x[0] == "option" and x[1].get("id") == "demo", parsed_license))) return respond( { @@ -363,4 +340,4 @@ def check_license(): import json import sys - print(lambda_handler({"body": json.dumps({"check_license": True})}, None)) + print(lambda_handler({"body": json.dumps({"check_license": True})}, {})) diff --git a/license.dat b/license.dat new file mode 100644 index 0000000..0bcb1e6 --- /dev/null +++ b/license.dat @@ -0,0 +1,10 @@ + + Non-commercial License + YesLogic Pty. Ltd. + Prince + latest + Personal User + 2015-02-10 + 41ECB34A2B8DCB51B6122D7EFBFFDA8B + + diff --git a/poetry.lock b/poetry.lock index 96260f6..1c43456 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,10 +1,9 @@ -# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "asgiref" version = "3.7.2" description = "ASGI specs, helper code, and adapters" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -22,7 +21,6 @@ tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"] name = "boto3" version = "1.26.149" description = "The AWS SDK for Python" -category = "main" optional = false python-versions = ">= 3.7" files = [ @@ -42,7 +40,6 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] name = "botocore" version = "1.29.149" description = "Low-level, data-driven core of boto 3." -category = "main" optional = false python-versions = ">= 3.7" files = [ @@ -58,11 +55,21 @@ urllib3 = ">=1.25.4,<1.27" [package.extras] crt = ["awscrt (==0.16.9)"] +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + [[package]] name = "django" version = "4.2.2" description = "A high-level Python web framework that encourages rapid development and clean, pragmatic design." -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -79,11 +86,35 @@ tzdata = {version = "*", markers = "sys_platform == \"win32\""} argon2 = ["argon2-cffi (>=19.1.0)"] bcrypt = ["bcrypt"] +[[package]] +name = "exceptiongroup" +version = "1.2.0" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.2.0-py3-none-any.whl", hash = "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14"}, + {file = "exceptiongroup-1.2.0.tar.gz", hash = "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68"}, +] + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + [[package]] name = "jmespath" version = "1.0.1" description = "JSON Matching Expressions" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -91,11 +122,58 @@ files = [ {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, ] +[[package]] +name = "packaging" +version = "24.0" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"}, + {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"}, +] + +[[package]] +name = "pluggy" +version = "1.4.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.4.0-py3-none-any.whl", hash = "sha256:7db9f7b503d67d1c5b95f59773ebb58a8c1c288129a88665838012cfb07b8981"}, + {file = "pluggy-1.4.0.tar.gz", hash = "sha256:8c85c2876142a764e5b7548e7d9a0e0ddb46f5185161049a79b7e974454223be"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pytest" +version = "8.1.1" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-8.1.1-py3-none-any.whl", hash = "sha256:2a8386cfc11fa9d2c50ee7b2a57e7d898ef90470a7a34c4b949ff59662bb78b7"}, + {file = "pytest-8.1.1.tar.gz", hash = "sha256:ac978141a75948948817d360297b7aae0fcb9d6ff6bc9ec6d514b85d5a65c044"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=1.4,<2.0" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + [[package]] name = "python-dateutil" version = "2.8.2" description = "Extensions to the standard Python datetime module" -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ @@ -106,11 +184,36 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "ruff" +version = "0.3.7" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruff-0.3.7-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:0e8377cccb2f07abd25e84fc5b2cbe48eeb0fea9f1719cad7caedb061d70e5ce"}, + {file = "ruff-0.3.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:15a4d1cc1e64e556fa0d67bfd388fed416b7f3b26d5d1c3e7d192c897e39ba4b"}, + {file = "ruff-0.3.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d28bdf3d7dc71dd46929fafeec98ba89b7c3550c3f0978e36389b5631b793663"}, + {file = "ruff-0.3.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:379b67d4f49774ba679593b232dcd90d9e10f04d96e3c8ce4a28037ae473f7bb"}, + {file = "ruff-0.3.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c060aea8ad5ef21cdfbbe05475ab5104ce7827b639a78dd55383a6e9895b7c51"}, + {file = "ruff-0.3.7-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:ebf8f615dde968272d70502c083ebf963b6781aacd3079081e03b32adfe4d58a"}, + {file = "ruff-0.3.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d48098bd8f5c38897b03604f5428901b65e3c97d40b3952e38637b5404b739a2"}, + {file = "ruff-0.3.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:da8a4fda219bf9024692b1bc68c9cff4b80507879ada8769dc7e985755d662ea"}, + {file = "ruff-0.3.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c44e0149f1d8b48c4d5c33d88c677a4aa22fd09b1683d6a7ff55b816b5d074f"}, + {file = "ruff-0.3.7-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:3050ec0af72b709a62ecc2aca941b9cd479a7bf2b36cc4562f0033d688e44fa1"}, + {file = "ruff-0.3.7-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:a29cc38e4c1ab00da18a3f6777f8b50099d73326981bb7d182e54a9a21bb4ff7"}, + {file = "ruff-0.3.7-py3-none-musllinux_1_2_i686.whl", hash = "sha256:5b15cc59c19edca917f51b1956637db47e200b0fc5e6e1878233d3a938384b0b"}, + {file = "ruff-0.3.7-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:e491045781b1e38b72c91247cf4634f040f8d0cb3e6d3d64d38dcf43616650b4"}, + {file = "ruff-0.3.7-py3-none-win32.whl", hash = "sha256:bc931de87593d64fad3a22e201e55ad76271f1d5bfc44e1a1887edd0903c7d9f"}, + {file = "ruff-0.3.7-py3-none-win_amd64.whl", hash = "sha256:5ef0e501e1e39f35e03c2acb1d1238c595b8bb36cf7a170e7c1df1b73da00e74"}, + {file = "ruff-0.3.7-py3-none-win_arm64.whl", hash = "sha256:789e144f6dc7019d1f92a812891c645274ed08af6037d11fc65fcbc183b7d59f"}, + {file = "ruff-0.3.7.tar.gz", hash = "sha256:d5c1aebee5162c2226784800ae031f660c350e7a3402c4d1f8ea4e97e232e3ba"}, +] + [[package]] name = "s3transfer" version = "0.6.1" description = "An Amazon S3 Transfer Manager" -category = "main" optional = false python-versions = ">= 3.7" files = [ @@ -128,7 +231,6 @@ crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] name = "six" version = "1.16.0" description = "Python 2 and 3 compatibility utilities" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -140,7 +242,6 @@ files = [ name = "sqlparse" version = "0.4.4" description = "A non-validating SQL parser." -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -153,11 +254,21 @@ dev = ["build", "flake8"] doc = ["sphinx"] test = ["pytest", "pytest-cov"] +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + [[package]] name = "typing-extensions" version = "4.6.3" description = "Backported and Experimental Type Hints for Python 3.7+" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -169,7 +280,6 @@ files = [ name = "tzdata" version = "2023.3" description = "Provider of IANA time zone data" -category = "main" optional = false python-versions = ">=2" files = [ @@ -181,7 +291,6 @@ files = [ name = "urllib3" version = "1.26.16" description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -197,4 +306,4 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "d1bda1f762b2670622741b99e59d1b1c7d451d66f338c391a41832c5c4d83cae" +content-hash = "d374073b43e296866bcc35ffb615eb08cd00573dba9d8d0ffcb5fec4db4fc34d" diff --git a/pyproject.toml b/pyproject.toml index cba23dc..0696b65 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,6 +12,70 @@ django = ">=3.1, <5.0.0" boto3 = "*" +[tool.poetry.group.dev.dependencies] +pytest = "^8.1.1" +ruff = "^0.3.7" + [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" + +[tool.ruff] + +# Exclude a variety of commonly ignored directories. +line-length = 120 + +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".hg", + ".mypy_cache", + ".nox", + ".pants.d", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "venv", + "api_client", +] + + +# Assume Python 3.10. +target-version = "py311" + +[tool.ruff.lint] +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +ignore = [ + "E501", # Line length is done by black + "D202", # NoBlankLineAfterFunction + "S101", # Use of `assert` detected + "S106", # Ignore secrets + "B008", # Do not perform function call. Lots of false positives with typer.Option +] + +# Enable Pyflakes `E` and `F` codes by default. +select = [ + "F", # Pyflakes + "E", # Pycodestyle + "W", # Pycodestyle + "C90", # Mccabe complexity + "C", # flake8-comprehensions + "B", # flake8-bugbear + "UP", # pyupgrade + "S", # bandit (security) + "DTZ", # Enforce good datetime + "BLE", # Blind exceptions + "I", # isort +] diff --git a/tests/test_lambda_e2e.py b/tests/test_lambda_e2e.py new file mode 100644 index 0000000..4dd8a6b --- /dev/null +++ b/tests/test_lambda_e2e.py @@ -0,0 +1,101 @@ +import base64 +import json +from typing import Any +from uuid import uuid4 + +import boto3 +import requests +from botocore.client import Config + +LAMBDA_URL = "http://lambda:8080/2015-03-31/functions/function/invocations" +BUCKET_NAME = "test" + + +def gen_temp_key(format: str = "html") -> str: + return f"tmp/{str(uuid4())}.{format}" + + +def get_s3_client() -> Any: + return boto3.client( + "s3", + aws_access_key_id="root", + aws_secret_access_key="password", + aws_session_token=None, + endpoint_url="http://minio:9000", + region_name="us-east-1", + config=Config(signature_version="s3v4", s3={"addressing_style": "path"}), + verify=False, + ) + + +def call_lamdba(body: dict, raise_exception=True) -> tuple[int, dict, bytes]: + response = requests.post(LAMBDA_URL, json={"body": json.dumps(body)}, timeout=60) + if raise_exception: + response.raise_for_status() + data = response.json() + status_code = data["statusCode"] + is_base64_encoded = data["isBase64Encoded"] + if is_base64_encoded: + return status_code, {}, base64.b64decode(data["body"]) + else: + body = json.loads(data.get("body")) if data.get("body") else {} + if raise_exception and status_code not in {200, 201}: + raise Exception(body) + + return status_code, body, b"" + + +def test_check_license_returns_a_license_payload() -> None: + status_code, body, _ = call_lamdba({"check_license": True}) + + assert status_code == 200 + assert body["is_demo_license"] is False + + +def test_sending_a_presigned_url_of_a_html_document(): + s3_client = get_s3_client() + + key = gen_temp_key() + s3_client.put_object(Bucket=BUCKET_NAME, Key=key, Body=b"

Z

") + + document_url = s3_client.generate_presigned_url( + "get_object", + Params={"Bucket": BUCKET_NAME, "Key": key}, + ) + + status_code, _, pdf_body = call_lamdba( + {"document_url": document_url}, + ) + + assert b"Z" in pdf_body + assert status_code == 200 + + +def test_sending_document_content(): + """Send an embedded html document and receive the pdf bytes for it""" + status_code, _, pdf_body = call_lamdba( + {"document_content": "

Z

"}, + ) + + assert b"Z" in pdf_body + assert status_code == 200 + + +def test_storing_output_pdf_to_a_presigned_url(): + s3_client = get_s3_client() + + key = gen_temp_key(format="pdf") + presigned_url = s3_client.generate_presigned_post( + BUCKET_NAME, + key, + ) + + status_code, _, _ = call_lamdba( + {"document_content": "

Z

", "presigned_url": presigned_url}, + ) + + obj = s3_client.get_object(Bucket=BUCKET_NAME, Key=key) + pdf_bytes = obj["Body"].read() + + assert status_code == 201 + assert b"Z" in pdf_bytes diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 0000000..138a5c3 --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,44 @@ +import base64 +import json +from typing import Any +from uuid import uuid4 + +import boto3 +import requests +from botocore.client import Config + +LAMBDA_URL = "http://localhost:8080/2015-03-31/functions/function/invocations" + + +def gen_temp_key() -> str: + return f"tmp/{str(uuid4())}.html" + + +def get_s3_client() -> Any: + return boto3.client( + "s3", + aws_access_key_id="root", + aws_secret_access_key="password", + aws_session_token=None, + endpoint_url="http://minio:9000", + region_name="us-east-1", + config=Config(signature_version="s3v4", s3={"addressing_style": "path"}), + verify=False, + ) + + +def call_lamdba(body: dict, raise_exception=True) -> tuple[int, dict, bytes]: + response = requests.post(LAMBDA_URL, json={"body": json.dumps(body)}, timeout=60) + if raise_exception: + response.raise_for_status() + data = response.json() + status_code = data["statusCode"] + is_base64_encoded = data["isBase64Encoded"] + if is_base64_encoded: + return status_code, {}, base64.b64decode(data["body"]) + else: + body = json.loads(data["body"]) + if raise_exception and status_code != 200: + raise Exception(body) + + return status_code, body, b"" diff --git a/uptick_splat/config.py b/uptick_splat/config.py index be0b457..719ed0e 100644 --- a/uptick_splat/config.py +++ b/uptick_splat/config.py @@ -1,5 +1,6 @@ +from collections.abc import Callable from dataclasses import dataclass -from typing import Any, Callable, Optional +from typing import Any from uuid import uuid4 import boto3 @@ -25,18 +26,18 @@ def delete_key(bucket_name: str, path: str) -> None: s3_client = session.client("s3") try: s3_client.delete_object(Bucket=bucket_name, Key=path) - except Exception as e: + except Exception as e: # noqa logger.warning(f"Failed to delete {path} from s3: {e}") def configure_splat( - function_region: Optional[str] = None, - function_name: Optional[str] = None, - default_bucket_name: Optional[str] = None, - default_tagging: Optional[str] = None, - get_session_fn: Optional[Callable[[], Any]] = None, - get_tmp_html_key_fn: Optional[Callable[[str], str]] = None, - delete_key_fn: Optional[Callable[[str, str], None]] = None, + function_region: str | None = None, + function_name: str | None = None, + default_bucket_name: str | None = None, + default_tagging: str | None = None, + get_session_fn: Callable[[], Any] | None = None, + get_tmp_html_key_fn: Callable[[str], str] | None = None, + delete_key_fn: Callable[[str, str], None] | None = None, ): """Configure the splat function. diff --git a/uptick_splat/utils.py b/uptick_splat/utils.py index a58e94b..433a232 100644 --- a/uptick_splat/utils.py +++ b/uptick_splat/utils.py @@ -1,9 +1,8 @@ import base64 import json -import os import re from json import JSONDecodeError -from typing import Dict, List, Optional, cast +from typing import cast from uuid import uuid4 from botocore.config import Config @@ -22,12 +21,12 @@ def strip_dangerous_s3_chars(filename: str) -> str: def pdf_from_html( body_html: str, *, - bucket_name: Optional[str] = None, - s3_filepath: Optional[str] = None, + bucket_name: str | None = None, + s3_filepath: str | None = None, javascript: bool = False, - fields: Optional[Dict] = None, - conditions: Optional[List[List]] = None, -) -> Optional[bytes]: + fields: dict | None = None, + conditions: list[list] | None = None, +) -> bytes | None: """Generates a pdf from html using the splat lambda function. :param body_html: the html to convert to pdf @@ -38,9 +37,7 @@ def pdf_from_html( """ bucket_name = bucket_name or config.default_bucket_name if not bucket_name: - raise SplatPDFGenerationFailure( - "Invalid configuration: no bucket name provided" - ) + raise SplatPDFGenerationFailure("Invalid configuration: no bucket name provided") is_streaming = not bool(s3_filepath) @@ -102,17 +99,16 @@ def pdf_from_html( # Check response of the invocation. Note that a successful invocation doesn't mean the PDF was generated. if response.get("StatusCode") != 200: raise SplatPDFGenerationFailure( - "Invalid response while invoking splat lambda -" - f" {response.get('StatusCode')}" + "Invalid response while invoking splat lambda -" f" {response.get('StatusCode')}" ) # Parse lambda response try: splat_response = json.loads(response["Payload"].read().decode("utf-8")) - except (KeyError, AttributeError): - raise SplatPDFGenerationFailure("Invalid lambda response format") - except JSONDecodeError: - raise SplatPDFGenerationFailure("Error decoding splat response body as json") + except (KeyError, AttributeError) as exc: + raise SplatPDFGenerationFailure("Invalid lambda response format") from exc + except JSONDecodeError as exc: + raise SplatPDFGenerationFailure("Error decoding splat response body as json") from exc # ==== Success ==== if splat_response.get("statusCode") == 201: @@ -122,7 +118,7 @@ def pdf_from_html( pdf_bytes = obj["Body"].read() try: config.delete_key_fn(bucket_name, destination_path) - except Exception as e: + except Exception: # noqa pass return cast(bytes, pdf_bytes) return None @@ -130,9 +126,7 @@ def pdf_from_html( # ==== Failure ==== # Lambda timeout et al. elif error_message := splat_response.get("errorMessage"): - raise SplatPDFGenerationFailure( - f"Error returned from lambda invocation: {error_message}" - ) + raise SplatPDFGenerationFailure(f"Error returned from lambda invocation: {error_message}") # All other errors else: # Try to extract an error message from splat response @@ -168,17 +162,14 @@ def pdf_from_html_without_s3( # Check response of the invocation. Note that a successful invocation doesn't mean the PDF was generated. if response.get("StatusCode") != 200: raise SplatPDFGenerationFailure( - "Invalid response while invoking splat lambda -" - f" {response.get('StatusCode')}" + "Invalid response while invoking splat lambda -" f" {response.get('StatusCode')}" ) # Parse lambda response try: splat_response = json.loads(response["Payload"].read().decode("utf-8")) - except (KeyError, AttributeError): - raise SplatPDFGenerationFailure("Invalid lambda response format") - except JSONDecodeError: - raise SplatPDFGenerationFailure("Error decoding splat response body as json") + except (KeyError, AttributeError) as exc: + raise SplatPDFGenerationFailure("Invalid lambda response format") from exc # ==== Success ==== if splat_response.get("statusCode") == 200: @@ -186,9 +177,7 @@ def pdf_from_html_without_s3( # ==== Failure ==== # Lambda timeout et al. elif error_message := splat_response.get("errorMessage"): - raise SplatPDFGenerationFailure( - f"Error returned from lambda invocation: {error_message}" - ) + raise SplatPDFGenerationFailure(f"Error returned from lambda invocation: {error_message}") # All other errors else: # Try to extract an error message from splat response