Merge pull request #49 from CanDIG/model_3

Merge model_3 into develop
CanDIG · Aug 21, 2024 · 9f13d80 · 9f13d80
2 parents 48b3eb0 + e06db61
commit 9f13d80
Show file tree

Hide file tree

Showing 3 changed files with 73 additions and 53 deletions.
diff --git a/query_server/config.py b/query_server/config.py
@@ -25,6 +25,8 @@
 
 try:
     SERVICE_TOKEN = create_service_token()
+    if DEBUG_MODE:
+        print(f"SERVICE_TOKEN: {SERVICE_TOKEN}")
 except:
     logger.error("Could not obtain a service token")
     SERVICE_TOKEN = ""
diff --git a/query_server/openapi.yaml b/query_server/openapi.yaml
@@ -30,9 +30,7 @@ paths:
             parameters:
                 - $ref: "#/components/parameters/treatmentParam"
                 - $ref: "#/components/parameters/primarySiteParam"
-                - $ref: "#/components/parameters/chemotherapyParam"
-                - $ref: "#/components/parameters/immunotherapyParam"
-                - $ref: "#/components/parameters/hormoneTherapyParam"
+                - $ref: "#/components/parameters/drugNameParam"
                 - $ref: "#/components/parameters/chrParam"
                 - $ref: "#/components/parameters/geneParam"
                 - $ref: "#/components/parameters/assemblyParam"
@@ -89,9 +87,7 @@ paths:
             parameters:
                 - $ref: "#/components/parameters/treatmentParam"
                 - $ref: "#/components/parameters/primarySiteParam"
-                - $ref: "#/components/parameters/chemotherapyParam"
-                - $ref: "#/components/parameters/immunotherapyParam"
-                - $ref: "#/components/parameters/hormoneTherapyParam"
+                - $ref: "#/components/parameters/drugNameParam"
                 - $ref: "#/components/parameters/chrParam"
                 - $ref: "#/components/parameters/geneParam"
                 - $ref: "#/components/parameters/assemblyParam"
@@ -140,30 +136,21 @@ components:
             required: false
             schema:
                 $ref: '#/components/schemas/Fields'
-        chemotherapyParam:
+        drugNameParam:
             in: query
-            name: chemotherapy
+            name: drug_name
             style: pipeDelimited
-            description: A pipe-separated list of chemotherapy treatments to look for
+            description: A pipe-separated list of drug names to look for
             example: FLUOROURACIL|LEUCOVORIN
             required: false
             schema:
                 $ref: '#/components/schemas/Fields'
-        immunotherapyParam:
+        systemicTherapyTypeParam:
             in: query
-            name: immunotherapy
+            name: systemic_therapy_type
             style: pipeDelimited
-            description: A pipe-separated list of immunotherapy treatments to look for
-            example: Necitumumab|Pembrolizumab
-            required: false
-            schema:
-                $ref: '#/components/schemas/Fields'
-        hormoneTherapyParam:
-            in: query
-            name: hormone_therapy
-            style: pipeDelimited
-            description: A pipe-separated list of hormone therapy treatments to look for
-            example: Goserelin|Leuprolide
+            description: A pipe-separated list of systemic therapies that we accept
+            example: Chemotherapy|Immunotherapy
             required: false
             schema:
                 $ref: '#/components/schemas/Fields'

diff --git a/query_server/query_operations.py b/query_server/query_operations.py
@@ -40,26 +40,43 @@ def safe_get_request_json(request, name):
     return request.json()
 
 # Grab a list of donors matching a given filter from the given URL
-def get_donors_from_katsu(url, param_name, parameter_list, headers):
+def get_donors_from_katsu(url, param_name, parameter_list, headers, therapy_type=None, keep_all=False):
     permissible_donors = set()
+    all_results = []
     for parameter in parameter_list:
         # TODO: Fix the page_size call here -- use a consume_all() query like in the frontend
         parameters = {
             param_name: parameter,
             'page_size': PAGE_SIZE
         }
+        if therapy_type != None:
+            parameters['systemic_therapy_type'] = therapy_type
         treatments = requests.get(f"{url}?{urllib.parse.urlencode(parameters)}", headers=headers)
         results = safe_get_request_json(treatments, f'Katsu {param_name}')['items']
         permissible_donors |= set([result['submitter_donor_id'] for result in results])
-    return permissible_donors
+        if keep_all:
+            all_results.extend(results)
+
+    # If we are required to return all results, query at least once
+    if not parameter_list and keep_all:
+        parameters = {
+            'page_size': PAGE_SIZE
+        }
+        if therapy_type != None:
+            parameters['systemic_therapy_type'] = therapy_type
+        treatments = requests.get(f"{url}?{urllib.parse.urlencode(parameters)}", headers=headers)
+        results = safe_get_request_json(treatments, f'Katsu {param_name}')['items']
+        permissible_donors |= set([result['submitter_donor_id'] for result in results])
+        all_results.extend(results)
+    return permissible_donors, all_results
 
 def add_or_increment(dict, key):
     if key in dict:
         dict[key] += 1
     else:
         dict[key] = 1
 
-def get_summary_stats(donors, headers):
+def get_summary_stats(donors, primary_diagnoses, headers):
     # Perform (and cache) summary statistics
     age_at_diagnosis = {}
     donors_by_id = {}
@@ -79,22 +96,23 @@ def get_summary_stats(donors, headers):
             else:
                 add_or_increment(age_at_diagnosis, f'{age}-{age+9} Years')
 
-        # Cancer types
-        if donor['primary_site']:
-            for cancer_type in donor['primary_site']:
-                if cancer_type in primary_site_count:
-                    primary_site_count[cancer_type] += 1
-                else:
-                    primary_site_count[cancer_type] = 1
         program_id = donor['program_id']
         if program_id in patients_per_cohort:
             patients_per_cohort[program_id] += 1
         elif program_id is not None:
             patients_per_cohort[program_id] = 1
 
+    # primary sites
+    for primary_diagnosis in primary_diagnoses:
+        primary_site = primary_diagnosis['primary_site']
+        if primary_site in primary_site_count:
+            primary_site_count[primary_site] += 1
+        else:
+            primary_site_count[primary_site] = 1
+
     # Treatment types
-    # http://candig.docker.internal:8008/v2/authorized/treatments/
-    treatments = requests.get(f"{config.KATSU_URL}/v2/authorized/treatments/?page_size={PAGE_SIZE}",
+    # http://candig.docker.internal:8008/v3/authorized/treatments/
+    treatments = requests.get(f"{config.KATSU_URL}/v3/authorized/treatments/?page_size={PAGE_SIZE}",
         headers=headers)
     treatments = safe_get_request_json(treatments, 'Katsu treatments')['items']
     treatment_type_count = {}
@@ -197,7 +215,7 @@ def fix_dicts(to_fix):
         return to_fix
 
 @app.route('/query')
-def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", hormone_therapy="", chrom="", gene="", page=0, page_size=10, assembly="hg38", exclude_cohorts=[], session_id=""):
+def query(treatment="", primary_site="", drug_name="", systemic_therapy_type="", chrom="", gene="", page=0, page_size=10, assembly="hg38", exclude_cohorts=[], session_id=""):
     # Add a service token to the headers so that other services will know this is from the query service:
     headers = {}
     for k in request.headers.keys():
@@ -209,9 +227,7 @@ def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", horm
 
     # Query the appropriate Katsu endpoint
     params = { 'page_size': PAGE_SIZE }
-    url = f"{config.KATSU_URL}/v2/authorized/donors/"
-    if primary_site != "":
-        params['primary_site'] = primary_site
+    url = f"{config.KATSU_URL}/v3/authorized/donors/"
     r = safe_get_request_json(requests.get(f"{url}?{urllib.parse.urlencode(params, True)}",
         # Reuse their bearer token
         headers=headers), 'Katsu Donors')
@@ -222,20 +238,32 @@ def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", horm
 
     # Will need to look into how to go about this -- ideally we implement this into the SQL in Katsu's side
     filters = [
-        (treatment, f"{config.KATSU_URL}/v2/authorized/treatments/", 'treatment_type'),
-        (chemotherapy, f"{config.KATSU_URL}/v2/authorized/chemotherapies/", 'drug_name'),
-        (immunotherapy, f"{config.KATSU_URL}/v2/authorized/immunotherapies/", 'drug_name'),
-        (hormone_therapy, f"{config.KATSU_URL}/v2/authorized/hormone_therapies/", 'drug_name')
+        (treatment, f"{config.KATSU_URL}/v3/authorized/treatments/", 'treatment_type', None)
     ]
-    for (this_filter, url, param_name) in filters:
+    if type(systemic_therapy_type) is list:
+        for this_type in systemic_therapy_type:
+            filters.append((drug_name, f"{config.KATSU_URL}/v3/authorized/systemic_therapies/", 'drug_name', this_type))
+    else:
+        filters.append((drug_name, f"{config.KATSU_URL}/v3/authorized/systemic_therapies/", 'drug_name', None))
+
+    for (this_filter, url, param_name, therapy_type) in filters:
         if this_filter != "":
-            permissible_donors = get_donors_from_katsu(
+            permissible_donors, _ = get_donors_from_katsu(
                 url,
                 param_name,
                 this_filter,
-                headers
+                headers,
+                therapy_type
             )
             donors = [donor for donor in donors if donor['submitter_donor_id'] in permissible_donors]
+    permissible_donors, primary_diagnoses = get_donors_from_katsu(
+        f"{config.KATSU_URL}/v3/authorized/primary_diagnoses/",
+        'primary_site',
+        primary_site if primary_site != "" else [],
+        headers,
+        keep_all=True
+    )
+    donors = [donor for donor in donors if donor['submitter_donor_id'] in permissible_donors]
 
     # Now we combine this with HTSGet, if any
     genomic_query = []
@@ -245,7 +273,7 @@ def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", horm
             htsget = query_htsget(headers, gene, assembly, chrom)
 
             # We need to be able to map specimens, so we'll grab it from Katsu
-            specimen_query_req = requests.get(f"{config.KATSU_URL}/v2/authorized/sample_registrations/?page_size=10000000", headers=headers)
+            specimen_query_req = requests.get(f"{config.KATSU_URL}/v3/authorized/sample_registrations/?page_size=10000000", headers=headers)
             specimen_query = safe_get_request_json(specimen_query_req, 'Katsu sample registrations')
             specimen_mapping = {}
             for specimen in specimen_query['items']:
@@ -299,8 +327,12 @@ def query(treatment="", primary_site="", chemotherapy="", immunotherapy="", horm
         except Exception as ex:
             logger.error(f"Error while reading HTSGet response: {ex}")
 
+    # We also need to cut down the list of primary diagnoses based on the filtered list of donors at this point
+    permissible_donors = set([donor['submitter_donor_id'] for donor in donors])
+    primary_diagnoses = [primary_diagnosis for primary_diagnosis in primary_diagnoses if primary_diagnosis['submitter_donor_id'] in permissible_donors]
+
     # TODO: Cache the above list of donor IDs and summary statistics
-    summary_stats = get_summary_stats(donors, headers)
+    summary_stats = get_summary_stats(donors, primary_diagnoses, headers)
 
     # Determine which part of the filtered donors to send back
     full_data = {
@@ -345,7 +377,7 @@ def genomic_completeness():
 @app.route('/discovery/programs')
 def discovery_programs():
     # Grab all programs from Katsu
-    url = f"{config.KATSU_URL}/v2/discovery/programs/"
+    url = f"{config.KATSU_URL}/v3/discovery/programs/"
     r = safe_get_request_json(requests.get(url), 'Katsu sample registrations')
 
     # Aggregate all of the programs' return values into one value for the entire site
@@ -417,8 +449,8 @@ def discovery_programs():
     return fix_dicts(ret_val), 200
 
 @app.route('/discovery/query')
-def discovery_query(treatment="", primary_site="", chemotherapy="", immunotherapy="", hormone_therapy="", chrom="", gene="", assembly="hg38", exclude_cohorts=[]):
-    url = f"{config.KATSU_URL}/v2/explorer/donors/"
+def discovery_query(treatment="", primary_site="", systemic_therapy="", systemic_therapy_type="", chrom="", gene="", assembly="hg38", exclude_cohorts=[]):
+    url = f"{config.KATSU_URL}/v3/explorer/donors/"
     headers = {}
     for k in request.headers.keys():
         headers[k] = request.headers[k]
@@ -427,9 +459,8 @@ def discovery_query(treatment="", primary_site="", chemotherapy="", immunotherap
     param_mapping = [
         (treatment, "treatment_type"),
         (primary_site, "primary_site"),
-        (chemotherapy, "chemotherapy_drug_name"),
-        (immunotherapy, "immunotherapy_drug_name"),
-        (hormone_therapy, "hormone_therapy_drug_name"),
+        (systemic_therapy, "systemic_therapy_drug_name"),
+        (systemic_therapy_type, "systemic_therapy_type"),
         (exclude_cohorts, "exclude_cohorts")
     ]
     params = {}