Skip to content

Commit

Permalink
Merge pull request #49 from CanDIG/model_3
Browse files Browse the repository at this point in the history
Merge model_3 into develop
  • Loading branch information
SonQBChau authored Aug 21, 2024
2 parents 48b3eb0 + e06db61 commit 9f13d80
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 53 deletions.
2 changes: 2 additions & 0 deletions query_server/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@

try:
SERVICE_TOKEN = create_service_token()
if DEBUG_MODE:
print(f"SERVICE_TOKEN: {SERVICE_TOKEN}")
except:
logger.error("Could not obtain a service token")
SERVICE_TOKEN = ""
31 changes: 9 additions & 22 deletions query_server/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,7 @@ paths:
parameters:
- $ref: "#/components/parameters/treatmentParam"
- $ref: "#/components/parameters/primarySiteParam"
- $ref: "#/components/parameters/chemotherapyParam"
- $ref: "#/components/parameters/immunotherapyParam"
- $ref: "#/components/parameters/hormoneTherapyParam"
- $ref: "#/components/parameters/drugNameParam"
- $ref: "#/components/parameters/chrParam"
- $ref: "#/components/parameters/geneParam"
- $ref: "#/components/parameters/assemblyParam"
Expand Down Expand Up @@ -89,9 +87,7 @@ paths:
parameters:
- $ref: "#/components/parameters/treatmentParam"
- $ref: "#/components/parameters/primarySiteParam"
- $ref: "#/components/parameters/chemotherapyParam"
- $ref: "#/components/parameters/immunotherapyParam"
- $ref: "#/components/parameters/hormoneTherapyParam"
- $ref: "#/components/parameters/drugNameParam"
- $ref: "#/components/parameters/chrParam"
- $ref: "#/components/parameters/geneParam"
- $ref: "#/components/parameters/assemblyParam"
Expand Down Expand Up @@ -140,30 +136,21 @@ components:
required: false
schema:
$ref: '#/components/schemas/Fields'
chemotherapyParam:
drugNameParam:
in: query
name: chemotherapy
name: drug_name
style: pipeDelimited
description: A pipe-separated list of chemotherapy treatments to look for
description: A pipe-separated list of drug names to look for
example: FLUOROURACIL|LEUCOVORIN
required: false
schema:
$ref: '#/components/schemas/Fields'
immunotherapyParam:
systemicTherapyTypeParam:
in: query
name: immunotherapy
name: systemic_therapy_type
style: pipeDelimited
description: A pipe-separated list of immunotherapy treatments to look for
example: Necitumumab|Pembrolizumab
required: false
schema:
$ref: '#/components/schemas/Fields'
hormoneTherapyParam:
in: query
name: hormone_therapy
style: pipeDelimited
description: A pipe-separated list of hormone therapy treatments to look for
example: Goserelin|Leuprolide
description: A pipe-separated list of systemic therapies that we accept
example: Chemotherapy|Immunotherapy
required: false
schema:
$ref: '#/components/schemas/Fields'
Expand Down
93 changes: 62 additions & 31 deletions query_server/query_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,26 +40,43 @@ def safe_get_request_json(request, name):
return request.json()

# Grab a list of donors matching a given filter from the given URL
def get_donors_from_katsu(url, param_name, parameter_list, headers):
def get_donors_from_katsu(url, param_name, parameter_list, headers, therapy_type=None, keep_all=False):
permissible_donors = set()
all_results = []
for parameter in parameter_list:
# TODO: Fix the page_size call here -- use a consume_all() query like in the frontend
parameters = {
param_name: parameter,
'page_size': PAGE_SIZE
}
if therapy_type != None:
parameters['systemic_therapy_type'] = therapy_type
treatments = requests.get(f"{url}?{urllib.parse.urlencode(parameters)}", headers=headers)
results = safe_get_request_json(treatments, f'Katsu {param_name}')['items']
permissible_donors |= set([result['submitter_donor_id'] for result in results])
return permissible_donors
if keep_all:
all_results.extend(results)

# If we are required to return all results, query at least once
if not parameter_list and keep_all:
parameters = {
'page_size': PAGE_SIZE
}
if therapy_type != None:
parameters['systemic_therapy_type'] = therapy_type
treatments = requests.get(f"{url}?{urllib.parse.urlencode(parameters)}", headers=headers)
results = safe_get_request_json(treatments, f'Katsu {param_name}')['items']
permissible_donors |= set([result['submitter_donor_id'] for result in results])
all_results.extend(results)
return permissible_donors, all_results

def add_or_increment(dict, key):
if key in dict:
dict[key] += 1
else:
dict[key] = 1

def get_summary_stats(donors, headers):
def get_summary_stats(donors, primary_diagnoses, headers):
# Perform (and cache) summary statistics
age_at_diagnosis = {}
donors_by_id = {}
Expand All @@ -79,22 +96,23 @@ def get_summary_stats(donors, headers):
else:
add_or_increment(age_at_diagnosis, f'{age}-{age+9} Years')

# Cancer types
if donor['primary_site']:
for cancer_type in donor['primary_site']:
if cancer_type in primary_site_count:
primary_site_count[cancer_type] += 1
else:
primary_site_count[cancer_type] = 1
program_id = donor['program_id']
if program_id in patients_per_cohort:
patients_per_cohort[program_id] += 1
elif program_id is not None:
patients_per_cohort[program_id] = 1

# primary sites
for primary_diagnosis in primary_diagnoses:
primary_site = primary_diagnosis['primary_site']
if primary_site in primary_site_count:
primary_site_count[primary_site] += 1
else:
primary_site_count[primary_site] = 1

# Treatment types
# http://candig.docker.internal:8008/v2/authorized/treatments/
treatments = requests.get(f"{config.KATSU_URL}/v2/authorized/treatments/?page_size={PAGE_SIZE}",
# http://candig.docker.internal:8008/v3/authorized/treatments/
treatments = requests.get(f"{config.KATSU_URL}/v3/authorized/treatments/?page_size={PAGE_SIZE}",
headers=headers)
treatments = safe_get_request_json(treatments, 'Katsu treatments')['items']
treatment_type_count = {}
Expand Down Expand Up @@ -197,7 +215,7 @@ def fix_dicts(to_fix):
return to_fix

@app.route('/query')
def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", hormone_therapy="", chrom="", gene="", page=0, page_size=10, assembly="hg38", exclude_cohorts=[], session_id=""):
def query(treatment="", primary_site="", drug_name="", systemic_therapy_type="", chrom="", gene="", page=0, page_size=10, assembly="hg38", exclude_cohorts=[], session_id=""):
# Add a service token to the headers so that other services will know this is from the query service:
headers = {}
for k in request.headers.keys():
Expand All @@ -209,9 +227,7 @@ def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", horm

# Query the appropriate Katsu endpoint
params = { 'page_size': PAGE_SIZE }
url = f"{config.KATSU_URL}/v2/authorized/donors/"
if primary_site != "":
params['primary_site'] = primary_site
url = f"{config.KATSU_URL}/v3/authorized/donors/"
r = safe_get_request_json(requests.get(f"{url}?{urllib.parse.urlencode(params, True)}",
# Reuse their bearer token
headers=headers), 'Katsu Donors')
Expand All @@ -222,20 +238,32 @@ def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", horm

# Will need to look into how to go about this -- ideally we implement this into the SQL in Katsu's side
filters = [
(treatment, f"{config.KATSU_URL}/v2/authorized/treatments/", 'treatment_type'),
(chemotherapy, f"{config.KATSU_URL}/v2/authorized/chemotherapies/", 'drug_name'),
(immunotherapy, f"{config.KATSU_URL}/v2/authorized/immunotherapies/", 'drug_name'),
(hormone_therapy, f"{config.KATSU_URL}/v2/authorized/hormone_therapies/", 'drug_name')
(treatment, f"{config.KATSU_URL}/v3/authorized/treatments/", 'treatment_type', None)
]
for (this_filter, url, param_name) in filters:
if type(systemic_therapy_type) is list:
for this_type in systemic_therapy_type:
filters.append((drug_name, f"{config.KATSU_URL}/v3/authorized/systemic_therapies/", 'drug_name', this_type))
else:
filters.append((drug_name, f"{config.KATSU_URL}/v3/authorized/systemic_therapies/", 'drug_name', None))

for (this_filter, url, param_name, therapy_type) in filters:
if this_filter != "":
permissible_donors = get_donors_from_katsu(
permissible_donors, _ = get_donors_from_katsu(
url,
param_name,
this_filter,
headers
headers,
therapy_type
)
donors = [donor for donor in donors if donor['submitter_donor_id'] in permissible_donors]
permissible_donors, primary_diagnoses = get_donors_from_katsu(
f"{config.KATSU_URL}/v3/authorized/primary_diagnoses/",
'primary_site',
primary_site if primary_site != "" else [],
headers,
keep_all=True
)
donors = [donor for donor in donors if donor['submitter_donor_id'] in permissible_donors]

# Now we combine this with HTSGet, if any
genomic_query = []
Expand All @@ -245,7 +273,7 @@ def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", horm
htsget = query_htsget(headers, gene, assembly, chrom)

# We need to be able to map specimens, so we'll grab it from Katsu
specimen_query_req = requests.get(f"{config.KATSU_URL}/v2/authorized/sample_registrations/?page_size=10000000", headers=headers)
specimen_query_req = requests.get(f"{config.KATSU_URL}/v3/authorized/sample_registrations/?page_size=10000000", headers=headers)
specimen_query = safe_get_request_json(specimen_query_req, 'Katsu sample registrations')
specimen_mapping = {}
for specimen in specimen_query['items']:
Expand Down Expand Up @@ -299,8 +327,12 @@ def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", horm
except Exception as ex:
logger.error(f"Error while reading HTSGet response: {ex}")

# We also need to cut down the list of primary diagnoses based on the filtered list of donors at this point
permissible_donors = set([donor['submitter_donor_id'] for donor in donors])
primary_diagnoses = [primary_diagnosis for primary_diagnosis in primary_diagnoses if primary_diagnosis['submitter_donor_id'] in permissible_donors]

# TODO: Cache the above list of donor IDs and summary statistics
summary_stats = get_summary_stats(donors, headers)
summary_stats = get_summary_stats(donors, primary_diagnoses, headers)

# Determine which part of the filtered donors to send back
full_data = {
Expand Down Expand Up @@ -345,7 +377,7 @@ def genomic_completeness():
@app.route('/discovery/programs')
def discovery_programs():
# Grab all programs from Katsu
url = f"{config.KATSU_URL}/v2/discovery/programs/"
url = f"{config.KATSU_URL}/v3/discovery/programs/"
r = safe_get_request_json(requests.get(url), 'Katsu sample registrations')

# Aggregate all of the programs' return values into one value for the entire site
Expand Down Expand Up @@ -417,8 +449,8 @@ def discovery_programs():
return fix_dicts(ret_val), 200

@app.route('/discovery/query')
def discovery_query(treatment="", primary_site="", chemotherapy="", immunotherapy="", hormone_therapy="", chrom="", gene="", assembly="hg38", exclude_cohorts=[]):
url = f"{config.KATSU_URL}/v2/explorer/donors/"
def discovery_query(treatment="", primary_site="", systemic_therapy="", systemic_therapy_type="", chrom="", gene="", assembly="hg38", exclude_cohorts=[]):
url = f"{config.KATSU_URL}/v3/explorer/donors/"
headers = {}
for k in request.headers.keys():
headers[k] = request.headers[k]
Expand All @@ -427,9 +459,8 @@ def discovery_query(treatment="", primary_site="", chemotherapy="", immunotherap
param_mapping = [
(treatment, "treatment_type"),
(primary_site, "primary_site"),
(chemotherapy, "chemotherapy_drug_name"),
(immunotherapy, "immunotherapy_drug_name"),
(hormone_therapy, "hormone_therapy_drug_name"),
(systemic_therapy, "systemic_therapy_drug_name"),
(systemic_therapy_type, "systemic_therapy_type"),
(exclude_cohorts, "exclude_cohorts")
]
params = {}
Expand Down

0 comments on commit 9f13d80

Please sign in to comment.