From adcae925ae8128e29273baa6022d2bf64a705589 Mon Sep 17 00:00:00 2001 From: Brittny Lapierre Date: Wed, 23 Oct 2024 17:42:04 -0400 Subject: [PATCH] Supporting the endpoinds for see also urls --- requirements.txt | 4 +- server.py | 148 +++++++++++++++++++++++++++++++++-------------- 2 files changed, 109 insertions(+), 43 deletions(-) diff --git a/requirements.txt b/requirements.txt index f1db66f..f4d6ea8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,4 +8,6 @@ typing_extensions==4.12.2 python-multipart==0.0.12 fastapi[standard] fastapi-sso -python-jose[cryptography] \ No newline at end of file +python-jose[cryptography] +boto3 +botocore \ No newline at end of file diff --git a/server.py b/server.py index e0c3453..22c5ba4 100644 --- a/server.py +++ b/server.py @@ -5,8 +5,8 @@ from PIL import Image from PIL.ExifTags import TAGS from typing import List -from fastapi import File, FastAPI, Depends, HTTPException, Security, Request, Header -from fastapi.responses import HTMLResponse, RedirectResponse +from fastapi import File, FastAPI, Depends, HTTPException, Security, Request, Header, Response +from fastapi.responses import HTMLResponse, RedirectResponse, StreamingResponse from typing_extensions import Annotated import requests from dotenv import load_dotenv @@ -18,7 +18,8 @@ from fastapi.security import APIKeyCookie # this is the part that puts the lock icon to the from fastapi_sso.sso.base import OpenID from jose import jwt # pip install python-jose[cryptography] - +import boto3 +import botocore # https://stackoverflow.com/questions/45244998/azure-ad-authentication-python-web-api load_dotenv(".env") @@ -32,10 +33,17 @@ AAD_TOKEN_URL=os.getenv('AAD_TOKEN_URL') NOID_SERVER=os.getenv('NOID_SERVER') + SWIFT_AUTH_URL=os.getenv('SWIFT_AUTH_URL') SWIFT_USERNAME=os.getenv('SWIFT_USERNAME') SWIFT_PASSWORD=os.getenv('SWIFT_PASSWORD') SWIFT_PREAUTH_URL=os.getenv('SWIFT_PREAUTH_URL') + +S3SOURCE_ENDPOINT=os.getenv('S3SOURCE_ENDPOINT') +S3SOURCE_ACCESS_KEY_ID=os.getenv('S3SOURCE_ACCESS_KEY_ID') +S3SOURCE_SECRET_KEY=os.getenv('S3SOURCE_SECRET_KEY') +S3SOURCE_ACCESSFILES_BUCKET_NAME=os.getenv('S3SOURCE_ACCESSFILES_BUCKET_NAME') + COUCHDB_USER=os.getenv('COUCHDB_USER') COUCHDB_PASSWORD=os.getenv('COUCHDB_PASSWORD') COUCHDB_URL=os.getenv('COUCHDB_URL') @@ -48,6 +56,16 @@ key=SWIFT_PASSWORD, preauthurl=SWIFT_PREAUTH_URL) +s3_conn = boto3.client( + service_name='s3', + aws_access_key_id=S3SOURCE_ACCESS_KEY_ID, + aws_secret_access_key=S3SOURCE_SECRET_KEY, + endpoint_url=S3SOURCE_ENDPOINT, + # The next option is only required because my provider only offers "version 2" + # authentication protocol. Otherwise this would be 's3v4' (the default, version 4). + config=botocore.client.Config(signature_version='s3') +) + # TODO - couch openvpn makes request too slow #couch = couchdb.Server('http://'+COUCHDB_USER+':'+COUCHDB_PASSWORD+'@'+COUCHDB_URL+'/') #db_access = couch['access'] @@ -84,6 +102,17 @@ def save_image_to_swift(local_filename, swift_filename, container): except: return None +def get_file_from_swift(swift_filename, container): + try: + resp_headers, obj_contents = conn.get_object(container, swift_filename) + print('got', type(obj_contents)) + print(resp_headers) + return resp_headers, obj_contents + except: + print(swift_filename, ' not found') + return None, None + + ''' def save_canvas(noid, encoded_noid, width, height, size, md5): #canvas = db_canvas.get(canvas['id']) @@ -122,38 +151,30 @@ async def get_logged_user(cookie: str = Security(APIKeyCookie(name="token"))) -> raise HTTPException(status_code=401, detail="Invalid authentication credentials") from error def verify_token(req: Request): - token = req.headers["Authorization"] - print("token", token) - # Here your code for verifying the token or whatever you use - valid = jwt.decode(token.replace("Bearer ", ""), key=AAD_CLIENT_SECRET, algorithms=["HS256"]) - print("res", valid) - if not valid: - raise HTTPException( - status_code=401, - detail="Unauthorized" - ) - return True + try: + token = req.headers["Authorization"] + print("token", token) + # Here your code for verifying the token or whatever you use + valid = jwt.decode(token.replace("Bearer ", ""), key=AAD_CLIENT_SECRET, algorithms=["HS256"]) + print("res", valid) + if not valid: + raise HTTPException( + status_code=401, + detail="Unauthorized" + ) + return True + except Exception as error: + print("An error occurred:", error) + return False -app = FastAPI() -@app.get("/bearer-protected") -async def protected_endpoint(authorized: bool = Depends(verify_token)): - message = { - "message": f"You are not authroized!", - } - if authorized: - message = { - "message": f"You are authroized!", - } - return message +app = FastAPI() -@app.get("/protected") -async def protected_endpoint(user: OpenID = Depends(get_logged_user)): - """This endpoint will say hello to the logged user. - If the user is not logged, it will return a 401 error from `get_logged_user`.""" - return { - "message": f"You are very welcome, {user.email}!", - } +@app.get("/") +async def main(cookie: str = Security(APIKeyCookie(name="token"))): + return { + "token": cookie, + } @app.get("/auth/login") @@ -188,9 +209,29 @@ async def login_callback(request: Request): ) # This cookie will make sure /protected knows the user return response +@app.get("/ocr/{prefix}/{noid}") +async def ocr(prefix, noid): + resp_headers, obj_contents = get_file_from_swift(prefix + "/" + noid + "/" + 'ocrTXTMAP.xml', 'access-metadata') + if obj_contents == None: + resp_headers, obj_contents = get_file_from_swift(prefix + "/" + noid + "/" + 'ocrALTO.xml', 'access-metadata') + return Response(content=obj_contents, media_type=resp_headers['content-type']) + +@app.get("/pdf/{prefix}/{noid}") +async def pdf(prefix, noid): + try: + result = s3_conn.get_object(Bucket='access-files', Key=prefix+"/"+ noid+".pdf") + return StreamingResponse(content=result["Body"].iter_chunks()) + except Exception as e: + if hasattr(e, "message"): + raise HTTPException( + status_code=e.message["response"]["Error"]["Code"], + detail=e.message["response"]["Error"]["Message"], + ) + else: + raise HTTPException(status_code=500, detail=str(e)) @app.post("/uploadfiles") -async def create_files(files: Annotated[List[bytes], File()], user: OpenID = Depends(get_logged_user)): +async def create_files(files: Annotated[List[bytes], File()], authorized: bool = Depends(verify_token)): canvases = [] # if form ! have manifest noid min noid else use noid... manifest_noid = mint_noid("manifest") @@ -243,9 +284,13 @@ async def create_files(files: Annotated[List[bytes], File()], user: OpenID = Dep } ] } + ], + "seeAlso" : [ + ] }) return {"canvases": canvases} + ''' Example Canvas Create Res: { "canvases": [ @@ -293,19 +338,36 @@ async def create_files(files: Annotated[List[bytes], File()], user: OpenID = Dep } ''' +''' Authorized Examples +@app.get("/bearer-protected") +async def protected_endpoint(authorized: bool = Depends(verify_token)): + message = { + "message": f"You are not authroized!", + } + if authorized: + message = { + "message": f"You are authroized!", + } + return message +''' +''' +@app.get("/protected") +async def protected_endpoint(user: OpenID = Depends(get_logged_user)): + """This endpoint will say hello to the logged user. + If the user is not logged, it will return a 401 error from `get_logged_user`.""" + return { + "message": f"You are very welcome, {user.email}!", + } +''' + -@app.get("/") -async def main(cookie: str = Security(APIKeyCookie(name="token"))): - return { - "token": cookie, - } ''' TODOS: -- Update this site on Azure (python now - will need to make a new app: crkn-access-api) -- Edit the front end app to use this site for auth instead of access.canadiana.ca -- CREATE ROUTE TO SERVE (non-image) files from Swift: access-files [pdf] and access-metadata [alto] +X - Edit the front end app to use this site for auth instead of access.canadiana.ca + +X - CREATE ROUTE TO SERVE (non-image) files from Swift: access-files [pdf] and access-metadata [alto] - Attach OCR Alto to Canvases Mirador text overlay: @@ -322,11 +384,13 @@ async def main(cookie: str = Security(APIKeyCookie(name="token"))): call this route from the front end app - OCR PDF changes: - Send urls to Mary API instead of couch + Change call for updating canvas in couch - file is replace with same name so we don't need to worry about updating IIIF) - Hammer changes: Use Mary API for Canvas and Manifest level data +- Update this site on Azure (python now - will need to make a new app: crkn-access-api) + - After migration to IIIF scripts complete: 1. No need to use the access.canadiana.ca metadata tab - just index using blacklight (hammer [the script used to add data to CAP] will get the metadata from this) 2. Once Hugo replaces WIP and releases his new OCR scripting, no need to use access.canadiana.ca OCR tab