Skip to content

Commit

Permalink
Merge pull request #62 from CanDIG/daisieh/cohorts
Browse files Browse the repository at this point in the history
DIG-1522: change cohort to program
  • Loading branch information
daisieh authored Nov 25, 2024
2 parents 07cca87 + 73e5899 commit f1c1002
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 25 deletions.
12 changes: 6 additions & 6 deletions query_server/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ paths:
- $ref: "#/components/parameters/assemblyParam"
- $ref: "#/components/parameters/pageSizeParam"
- $ref: "#/components/parameters/pageParam"
- $ref: "#/components/parameters/excludeCohortsParam"
- $ref: "#/components/parameters/excludeProgramsParam"
operationId: query_operations.query
responses:
200:
Expand Down Expand Up @@ -91,7 +91,7 @@ paths:
- $ref: "#/components/parameters/chrParam"
- $ref: "#/components/parameters/geneParam"
- $ref: "#/components/parameters/assemblyParam"
- $ref: "#/components/parameters/excludeCohortsParam"
- $ref: "#/components/parameters/excludeProgramsParam"
responses:
200:
description: Summary statistics
Expand All @@ -115,7 +115,7 @@ paths:
$ref: '#/components/schemas/UserInfo'
5XX:
$ref: "#/components/responses/5xxServerError"

components:
parameters:
treatmentParam:
Expand Down Expand Up @@ -178,11 +178,11 @@ components:
example: hg38
schema:
$ref: '#/components/schemas/Field'
excludeCohortsParam:
excludeProgramsParam:
in: query
name: exclude_cohorts
name: exclude_programs
style: pipeDelimited
description: A list of cohorts that will be excluded from results
description: A list of programs that will be excluded from results
example: SYNTHETIC-1
schema:
$ref: '#/components/schemas/Fields'
Expand Down
38 changes: 19 additions & 19 deletions query_server/query_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def get_summary_stats(donors, primary_sites, treatments):
age_at_diagnosis = {}
donors_by_id = {}
primary_site_count = {}
patients_per_cohort = {}
patients_per_program = {}
treatment_type_count = {}
for donor in donors:
# A donor's date of birth is defined as the (negative) interval between actual DOB and the date of first diagnosis
Expand All @@ -98,7 +98,7 @@ def get_summary_stats(donors, primary_sites, treatments):
add_or_increment(age_at_diagnosis, f'{age}-{age+9} Years')

program_id = donor['program_id']
add_or_increment(patients_per_cohort, program_id)
add_or_increment(patients_per_program, program_id)

# primary sites
if donor['submitter_donor_id'] in primary_sites:
Expand All @@ -116,7 +116,7 @@ def get_summary_stats(donors, primary_sites, treatments):
'age_at_diagnosis': age_at_diagnosis,
'treatment_type_count': treatment_type_count,
'primary_site_count': primary_site_count,
'patients_per_cohort': patients_per_cohort
'patients_per_program': patients_per_program
}

def query_htsget_gene(headers, gene_array):
Expand Down Expand Up @@ -216,7 +216,7 @@ def format_query_response(donors, genomic_query, summary_stats, page, page_size)
return fix_dicts(full_data), 200

@app.route('/query')
def query(treatment="", primary_site="", drug_name="", systemic_therapy_type="", chrom="", gene="", page=0, page_size=10, assembly="hg38", exclude_cohorts=[], session_id=""):
def query(treatment="", primary_site="", drug_name="", systemic_therapy_type="", chrom="", gene="", page=0, page_size=10, assembly="hg38", exclude_programs=[], session_id=""):
# Add a service token to the headers so that other services will know this is from the query service:
headers = {}
for k in request.headers.keys():
Expand All @@ -238,7 +238,7 @@ def query(treatment="", primary_site="", drug_name="", systemic_therapy_type="",
(primary_site, "primary_site"),
(drug_name, "systemic_therapy_drug_name"),
(systemic_therapy_type, "systemic_therapy_type"),
(exclude_cohorts, "exclude_cohorts")
(exclude_programs, "exclude_programs")
]
params = {
'page_size': PAGE_SIZE
Expand All @@ -261,8 +261,8 @@ def query(treatment="", primary_site="", drug_name="", systemic_therapy_type="",
raise Exception(err_msg)
donors = donors_req.json()['items']

# Filter on excluded cohorts
donors = [donor for donor in donors if donor['program_id'] not in exclude_cohorts]
# Filter on excluded programs
donors = [donor for donor in donors if donor['program_id'] not in exclude_programs]

# Note: We get three extra things from /authorized/query that aren't part of the Donors object:
# 1) submitter_sample_ids
Expand Down Expand Up @@ -297,8 +297,8 @@ def query(treatment="", primary_site="", drug_name="", systemic_therapy_type="",
# However, that part isn't covered in this PR (it's in DIG-1372 (https://candig.atlassian.net/browse/DIG-1372))
# and does not yet function
# genomic_query_info = htsget['query_info']
# for cohort in genomic_query_info:
# sample_ids = genomic_query_info[cohort]
# for program in genomic_query_info:
# sample_ids = genomic_query_info[program]

htsget_found_donors = {}
responses = htsget['response'] if 'response' in htsget else []
Expand Down Expand Up @@ -354,16 +354,16 @@ def genomic_completeness():
headers[k] = request.headers[k]
headers["X-Service-Token"] = config.SERVICE_TOKEN

cohorts = safe_get_request_json(requests.get(f"{config.HTSGET_URL}/ga4gh/drs/v1/cohorts",
programs = safe_get_request_json(requests.get(f"{config.HTSGET_URL}/ga4gh/drs/v1/programs",
# Reuse their bearer token
headers=headers), 'HTSGet cohorts')
headers=headers), 'HTSGet programs')
retVal = {}
for program_id in cohorts:
cohort = safe_get_request_json(requests.get(f"{config.HTSGET_URL}/ga4gh/drs/v1/cohorts/{program_id}",
for program_id in programs:
program = safe_get_request_json(requests.get(f"{config.HTSGET_URL}/ga4gh/drs/v1/programs/{program_id}",
# Reuse their bearer token
headers=headers), 'HTSGet cohort statistics')
headers=headers), 'HTSGet program statistics')
if program_id not in retVal:
retVal[program_id] = cohort["statistics"]
retVal[program_id] = program["statistics"]

return retVal, 200

Expand Down Expand Up @@ -442,7 +442,7 @@ def discovery_programs():
return fix_dicts(ret_val), 200

@app.route('/discovery/query')
def discovery_query(treatment="", primary_site="", drug_name="", chrom="", gene="", assembly="hg38", exclude_cohorts=[]):
def discovery_query(treatment="", primary_site="", drug_name="", chrom="", gene="", assembly="hg38", exclude_programs=[]):
url = f"{config.KATSU_URL}/v3/explorer/donors/"
headers = {}
for k in request.headers.keys():
Expand All @@ -453,7 +453,7 @@ def discovery_query(treatment="", primary_site="", drug_name="", chrom="", gene=
(treatment, "treatment_type"),
(primary_site, "primary_site"),
(drug_name, "systemic_therapy_drug_name"),
(exclude_cohorts, "exclude_cohorts")
(exclude_programs, "exclude_programs")
]
params = {
"page_size": PAGE_SIZE
Expand Down Expand Up @@ -500,12 +500,12 @@ def discovery_query(treatment="", primary_site="", drug_name="", chrom="", gene=
'age_at_diagnosis': {},
'treatment_type_count': {},
'primary_site_count': {},
'patients_per_cohort': {}
'patients_per_program': {}
}
summary_stat_mapping = [
('age_at_diagnosis', 'age_at_diagnosis'),
('treatment_type_count', 'treatment_type'),
('patients_per_cohort', 'program_id'),
('patients_per_program', 'program_id'),
('primary_site_count', 'primary_site')
]
for donor in donors:
Expand Down

0 comments on commit f1c1002

Please sign in to comment.