Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DIG-1836: fix Synk vulnerabilities #63

Merged
merged 5 commits into from
Nov 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ if [[ -f "initial_setup" ]]; then
fi

cd query_server
gunicorn server:app
gunicorn -k uvicorn.workers.UvicornWorker server:app
65 changes: 31 additions & 34 deletions query_server/query_operations.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from flask import request, Flask
from flask import Flask
import config
import copy
import re
import requests
import connexion
import secrets
import urllib
from authx.auth import get_user_id, get_auth_token
Expand Down Expand Up @@ -34,10 +35,21 @@ def get_service_info():
"version": "0.1.0"
}

def safe_get_request_json(request, name):
if not request.ok:
raise Exception(f"Could not get {name} response: {request.status_code} {request.text}")
return request.json()

def safe_get_response_json(response, name):
if not response.ok:
raise Exception(f"Could not get {name} response: {response.status_code} {response.text}")
return response.json()


def get_headers():
# Add a service token to the headers so that other services will know this is from the query service:
headers = {}
for k in connexion.request.headers.keys():
headers[k] = connexion.request.headers[k]
headers["X-Service-Token"] = config.SERVICE_TOKEN
return headers


# Grab a list of donors matching a given filter from the given URL
def get_donors_from_katsu(url, param_name, parameter_list, headers, therapy_type=None, keep_all=False):
Expand All @@ -52,7 +64,7 @@ def get_donors_from_katsu(url, param_name, parameter_list, headers, therapy_type
if therapy_type != None:
parameters['systemic_therapy_type'] = therapy_type
treatments = requests.get(f"{url}?{urllib.parse.urlencode(parameters)}", headers=headers)
results = safe_get_request_json(treatments, f'Katsu {param_name}')['items']
results = safe_get_response_json(treatments, f'Katsu {param_name}')['items']
permissible_donors |= set([result['submitter_donor_id'] for result in results])
if keep_all:
all_results.extend(results)
Expand All @@ -65,7 +77,7 @@ def get_donors_from_katsu(url, param_name, parameter_list, headers, therapy_type
if therapy_type != None:
parameters['systemic_therapy_type'] = therapy_type
treatments = requests.get(f"{url}?{urllib.parse.urlencode(parameters)}", headers=headers)
results = safe_get_request_json(treatments, f'Katsu {param_name}')['items']
results = safe_get_response_json(treatments, f'Katsu {param_name}')['items']
permissible_donors |= set([result['submitter_donor_id'] for result in results])
all_results.extend(results)
return permissible_donors, all_results
Expand Down Expand Up @@ -132,7 +144,7 @@ def query_htsget_gene(headers, gene_array):
}
}

return safe_get_request_json(requests.post(
return safe_get_response_json(requests.post(
f"{config.HTSGET_URL}/beacon/v2/g_variants",
headers=headers,
json=payload), 'HTSGet Gene')
Expand All @@ -152,7 +164,7 @@ def query_htsget_pos(headers, assembly, chrom, start=0, end=10000000):
}
}

return safe_get_request_json(requests.post(
return safe_get_response_json(requests.post(
f"{config.HTSGET_URL}/beacon/v2/g_variants",
headers=headers,
json=payload), 'HTSGet position')
Expand Down Expand Up @@ -217,21 +229,13 @@ def format_query_response(donors, genomic_query, summary_stats, page, page_size)

@app.route('/query')
def query(treatment="", primary_site="", drug_name="", systemic_therapy_type="", chrom="", gene="", page=0, page_size=10, assembly="hg38", exclude_programs=[], session_id=""):
# Add a service token to the headers so that other services will know this is from the query service:
headers = {}
for k in request.headers.keys():
headers[k] = request.headers[k]
headers["X-Service-Token"] = config.SERVICE_TOKEN
headers = get_headers()

# NB: We're still doing table joins here, which is probably not where we want to do them
# We're grabbing (and storing in memory) all the donor data in Katsu with the below request

# Query the appropriate Katsu endpoint
url = f"{config.KATSU_URL}/v3/authorized/query/"
headers = {}
for k in request.headers.keys():
headers[k] = request.headers[k]
headers["X-Service-Token"] = config.SERVICE_TOKEN

param_mapping = [
(treatment, "treatment_type"),
Expand Down Expand Up @@ -287,7 +291,7 @@ def query(treatment="", primary_site="", drug_name="", systemic_therapy_type="",

# We need to be able to map specimens, so we'll grab it from Katsu
specimen_query_req = requests.get(f"{config.KATSU_URL}/v3/authorized/sample_registrations/?page_size=10000000", headers=headers)
specimen_query = safe_get_request_json(specimen_query_req, 'Katsu sample registrations')
specimen_query = safe_get_response_json(specimen_query_req, 'Katsu sample registrations')
specimen_mapping = {}
for specimen in specimen_query['items']:
specimen_mapping[specimen['submitter_sample_id']] = (specimen['submitter_donor_id'], specimen['tumour_normal_designation'])
Expand Down Expand Up @@ -348,18 +352,14 @@ def query(treatment="", primary_site="", drug_name="", systemic_therapy_type="",

@app.route('/genomic_completeness')
def genomic_completeness():
# Add a service token to the headers so that Katsu will know this is from the query service:
headers = {}
for k in request.headers.keys():
headers[k] = request.headers[k]
headers["X-Service-Token"] = config.SERVICE_TOKEN
headers = get_headers()

programs = safe_get_request_json(requests.get(f"{config.HTSGET_URL}/ga4gh/drs/v1/programs",
programs = safe_get_response_json(requests.get(f"{config.HTSGET_URL}/ga4gh/drs/v1/programs",
# Reuse their bearer token
headers=headers), 'HTSGet programs')
retVal = {}
for program_id in programs:
program = safe_get_request_json(requests.get(f"{config.HTSGET_URL}/ga4gh/drs/v1/programs/{program_id}",
program = safe_get_response_json(requests.get(f"{config.HTSGET_URL}/ga4gh/drs/v1/programs/{program_id}",
# Reuse their bearer token
headers=headers), 'HTSGet program statistics')
if program_id not in retVal:
Expand All @@ -371,7 +371,7 @@ def genomic_completeness():
def discovery_programs():
# Grab all programs from Katsu
url = f"{config.KATSU_URL}/v3/discovery/programs/"
r = safe_get_request_json(requests.get(url), 'Katsu sample registrations')
r = safe_get_response_json(requests.get(url), 'Katsu sample registrations')

# Aggregate all of the programs' return values into one value for the entire site
site_summary_stats = {
Expand Down Expand Up @@ -444,10 +444,7 @@ def discovery_programs():
@app.route('/discovery/query')
def discovery_query(treatment="", primary_site="", drug_name="", chrom="", gene="", assembly="hg38", exclude_programs=[]):
url = f"{config.KATSU_URL}/v3/explorer/donors/"
headers = {}
for k in request.headers.keys():
headers[k] = request.headers[k]
headers["X-Service-Token"] = config.SERVICE_TOKEN
headers = get_headers()

param_mapping = [
(treatment, "treatment_type"),
Expand All @@ -464,7 +461,7 @@ def discovery_query(treatment="", primary_site="", drug_name="", chrom="", gene=
params[param[1]] = param[0]

full_url = f"{url}?{urllib.parse.urlencode(params, doseq=True)}"
donors = safe_get_request_json(requests.get(full_url, headers=headers), 'Katsu explorer donors')
donors = safe_get_response_json(requests.get(full_url, headers=headers), 'Katsu explorer donors')

# Cross reference with HTSGet, if necessary
if gene != "" or chrom != "":
Expand Down Expand Up @@ -526,7 +523,7 @@ def whoami():
# Grab information about the currently logged-in user
logger.debug(config.OPA_URL)
logger.debug(config.AUTHZ)
token = get_auth_token(request)
token = get_auth_token(connexion.request)
headers = {
"Authorization": f"Bearer {token}"
}
Expand All @@ -540,4 +537,4 @@ def whoami():
}
)
logger.debug(response)
return { 'key': get_user_id(request, opa_url = config.OPA_URL) }
return { 'key': get_user_id(connexion.request, opa_url = config.OPA_URL) }
2 changes: 1 addition & 1 deletion query_server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
candigv2_logging.logging.initialize()

# Create the application instance
app = connexion.FlaskApp(__name__, specification_dir='./', options={"swagger_url": "/api"})
app = connexion.App(__name__, specification_dir='./')
app.app.config['SECRET_KEY'] = secrets.token_bytes(32)
CORS(app.app)

Expand Down
13 changes: 9 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
Flask==2.2.5
Flask==3.1.0
Flask-Cors==5.0.0
pytest==7.2.0
connexion==2.14.2
pytest==8.3.3
connexion==3.1.0
connexion[swagger-ui]
connexion[flask]
gunicorn>=23.0.0
swagger-ui-bundle==0.0.9
candigv2-authx@git+https://github.com/CanDIG/[email protected]
candigv2-logging@git+https://github.com/CanDIG/[email protected]
uvicorn[standard]==0.30.6
werkzeug>=3.1.0 # not directly required, pinned by Snyk to avoid a vulnerability
zipp>=3.19.1 # not directly required, pinned by Snyk to avoid a vulnerability
urllib3>=2.2.2 # not directly required, pinned by Snyk to avoid a vulnerability