Skip to content

Commit

Permalink
feat: add ./splat_cli helper script
Browse files Browse the repository at this point in the history
  • Loading branch information
uptickmetachu committed Apr 18, 2024
1 parent b771ca9 commit 4d3f3fc
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 40 deletions.
26 changes: 14 additions & 12 deletions lambda_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,14 @@
import tempfile
import uuid
import xml.etree.ElementTree as ET
from collections.abc import Iterator
from contextlib import contextmanager
from dataclasses import dataclass, field
from urllib.parse import urlparse

import boto3
import playwright
import playwright.sync_api
import pydantic
import requests
import sentry_sdk
Expand Down Expand Up @@ -90,7 +94,8 @@ def init() -> None:
os.environ["FONTCONFIG_PATH"] = "/var/task/fonts"


def playwright_page_to_pdf(browser_url: str, headers: dict, output_filepath: str) -> None:
@contextmanager
def _playwright_visit_page(browser_url: str, headers: dict) -> Iterator[playwright.sync_api.Page]:
print("splat|playwright_handler|url=", browser_url)
with sync_playwright() as p:
browser = p.chromium.launch()
Expand All @@ -102,21 +107,18 @@ def playwright_page_to_pdf(browser_url: str, headers: dict, output_filepath: str
wait_until="domcontentloaded",
)
page.emulate_media(media="print")
page.wait_for_load_state("domcontentloaded")
page.wait_for_load_state("networkidle")
yield page


def playwright_page_to_pdf(browser_url: str, headers: dict, output_filepath: str) -> None:
with _playwright_visit_page(browser_url, headers) as page:
page.pdf(path=output_filepath, format="A4")


def playwright_page_to_html_string(browser_url: str, headers: dict) -> str:
print("splat|playwright_handler|url=", browser_url)
with sync_playwright() as p:
browser = p.chromium.launch()
context = browser.new_context()
context.set_extra_http_headers(headers)
page = context.new_page()
page.goto(
browser_url,
wait_until="domcontentloaded",
)
page.emulate_media(media="print")
with _playwright_visit_page(browser_url, headers) as page:
return page.content()


Expand Down
28 changes: 0 additions & 28 deletions scripts/local.py

This file was deleted.

67 changes: 67 additions & 0 deletions splat_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/usr/bin/env python
# usage: ./splat_cli.py --open -o /tmp/google.pdf -b https://google.com
import argparse
import base64
import json
import pathlib

import requests

DEFAULT_LAMBDA_URL = "http://localhost:8080/2015-03-31/functions/function/invocations"

parser = argparse.ArgumentParser(
description="Run against splat locally. Sample usage: ./splat_cli.py --open -o /tmp/google.pdf -b https://google.com"
)
parser.add_argument("--document-content", "-c", help="The content of the document")
parser.add_argument("--document-url", "-u", help="The URL of the document")
parser.add_argument("--browser-url", "-b", help="Use a playwright to browse to the url")
parser.add_argument("--renderer", "-r", help="The renderer to use", default="princexml")
parser.add_argument("--output-path", "-o", help="The path to save the output PDF", required=True)
parser.add_argument("--open", help="Open the resulting pdf", default=False, action="store_true")
parser.add_argument(
"--lambda-url",
help="Lambda URL to receive the payload body. Defaults to local dev setup.",
default=DEFAULT_LAMBDA_URL,
)

args = parser.parse_args()

document_content = args.document_content
document_url = args.document_url
browser_url = args.browser_url
renderer = args.renderer
output_path = args.output_path
lambda_url = args.lambda_url


def call_lamdba(body: dict, raise_exception=True) -> tuple[int, dict, bytes]:
response = requests.post(lambda_url, json={"body": json.dumps(body)}, timeout=60)
if raise_exception:
response.raise_for_status()
data = response.json()
status_code = data["statusCode"]
is_base64_encoded = data["isBase64Encoded"]
if is_base64_encoded:
return status_code, {}, base64.b64decode(data["body"])
else:
body = json.loads(data.get("body")) if data.get("body") else {}
if raise_exception and status_code not in {200, 201}:
raise Exception(body)

return status_code, body, b""


if __name__ == "__main__":
body = {"renderer": renderer}
if document_content:
body["document_content"] = document_content
elif document_url:
body["document_url"] = document_url
elif browser_url:
body["browser_url"] = browser_url
_, _, pdf_bytes = call_lamdba(body)
pathlib.Path(output_path).write_bytes(pdf_bytes)
if args.open:
import os

os.system(f"open {output_path}") # noqa

0 comments on commit 4d3f3fc

Please sign in to comment.