Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DIG-1522: change cohort to program #62

Merged
merged 2 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions query_server/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ paths:
- $ref: "#/components/parameters/assemblyParam"
- $ref: "#/components/parameters/pageSizeParam"
- $ref: "#/components/parameters/pageParam"
- $ref: "#/components/parameters/excludeCohortsParam"
- $ref: "#/components/parameters/excludeProgramsParam"
operationId: query_operations.query
responses:
200:
Expand Down Expand Up @@ -91,7 +91,7 @@ paths:
- $ref: "#/components/parameters/chrParam"
- $ref: "#/components/parameters/geneParam"
- $ref: "#/components/parameters/assemblyParam"
- $ref: "#/components/parameters/excludeCohortsParam"
- $ref: "#/components/parameters/excludeProgramsParam"
responses:
200:
description: Summary statistics
Expand All @@ -115,7 +115,7 @@ paths:
$ref: '#/components/schemas/UserInfo'
5XX:
$ref: "#/components/responses/5xxServerError"

components:
parameters:
treatmentParam:
Expand Down Expand Up @@ -178,11 +178,11 @@ components:
example: hg38
schema:
$ref: '#/components/schemas/Field'
excludeCohortsParam:
excludeProgramsParam:
in: query
name: exclude_cohorts
name: exclude_programs
style: pipeDelimited
description: A list of cohorts that will be excluded from results
description: A list of programs that will be excluded from results
example: SYNTHETIC-1
schema:
$ref: '#/components/schemas/Fields'
Expand Down
38 changes: 19 additions & 19 deletions query_server/query_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def get_summary_stats(donors, primary_sites, treatments):
age_at_diagnosis = {}
donors_by_id = {}
primary_site_count = {}
patients_per_cohort = {}
patients_per_program = {}
treatment_type_count = {}
for donor in donors:
# A donor's date of birth is defined as the (negative) interval between actual DOB and the date of first diagnosis
Expand All @@ -98,7 +98,7 @@ def get_summary_stats(donors, primary_sites, treatments):
add_or_increment(age_at_diagnosis, f'{age}-{age+9} Years')

program_id = donor['program_id']
add_or_increment(patients_per_cohort, program_id)
add_or_increment(patients_per_program, program_id)

# primary sites
if donor['submitter_donor_id'] in primary_sites:
Expand All @@ -116,7 +116,7 @@ def get_summary_stats(donors, primary_sites, treatments):
'age_at_diagnosis': age_at_diagnosis,
'treatment_type_count': treatment_type_count,
'primary_site_count': primary_site_count,
'patients_per_cohort': patients_per_cohort
'patients_per_program': patients_per_program
}

def query_htsget_gene(headers, gene_array):
Expand Down Expand Up @@ -216,7 +216,7 @@ def format_query_response(donors, genomic_query, summary_stats, page, page_size)
return fix_dicts(full_data), 200

@app.route('/query')
def query(treatment="", primary_site="", drug_name="", systemic_therapy_type="", chrom="", gene="", page=0, page_size=10, assembly="hg38", exclude_cohorts=[], session_id=""):
def query(treatment="", primary_site="", drug_name="", systemic_therapy_type="", chrom="", gene="", page=0, page_size=10, assembly="hg38", exclude_programs=[], session_id=""):
# Add a service token to the headers so that other services will know this is from the query service:
headers = {}
for k in request.headers.keys():
Expand All @@ -238,7 +238,7 @@ def query(treatment="", primary_site="", drug_name="", systemic_therapy_type="",
(primary_site, "primary_site"),
(drug_name, "systemic_therapy_drug_name"),
(systemic_therapy_type, "systemic_therapy_type"),
(exclude_cohorts, "exclude_cohorts")
(exclude_programs, "exclude_programs")
]
params = {
'page_size': PAGE_SIZE
Expand All @@ -261,8 +261,8 @@ def query(treatment="", primary_site="", drug_name="", systemic_therapy_type="",
raise Exception(err_msg)
donors = donors_req.json()['items']

# Filter on excluded cohorts
donors = [donor for donor in donors if donor['program_id'] not in exclude_cohorts]
# Filter on excluded programs
donors = [donor for donor in donors if donor['program_id'] not in exclude_programs]

# Note: We get three extra things from /authorized/query that aren't part of the Donors object:
# 1) submitter_sample_ids
Expand Down Expand Up @@ -297,8 +297,8 @@ def query(treatment="", primary_site="", drug_name="", systemic_therapy_type="",
# However, that part isn't covered in this PR (it's in DIG-1372 (https://candig.atlassian.net/browse/DIG-1372))
# and does not yet function
# genomic_query_info = htsget['query_info']
# for cohort in genomic_query_info:
# sample_ids = genomic_query_info[cohort]
# for program in genomic_query_info:
# sample_ids = genomic_query_info[program]

htsget_found_donors = {}
responses = htsget['response'] if 'response' in htsget else []
Expand Down Expand Up @@ -354,16 +354,16 @@ def genomic_completeness():
headers[k] = request.headers[k]
headers["X-Service-Token"] = config.SERVICE_TOKEN

cohorts = safe_get_request_json(requests.get(f"{config.HTSGET_URL}/ga4gh/drs/v1/cohorts",
programs = safe_get_request_json(requests.get(f"{config.HTSGET_URL}/ga4gh/drs/v1/programs",
# Reuse their bearer token
headers=headers), 'HTSGet cohorts')
headers=headers), 'HTSGet programs')
retVal = {}
for program_id in cohorts:
cohort = safe_get_request_json(requests.get(f"{config.HTSGET_URL}/ga4gh/drs/v1/cohorts/{program_id}",
for program_id in programs:
program = safe_get_request_json(requests.get(f"{config.HTSGET_URL}/ga4gh/drs/v1/programs/{program_id}",
# Reuse their bearer token
headers=headers), 'HTSGet cohort statistics')
headers=headers), 'HTSGet program statistics')
if program_id not in retVal:
retVal[program_id] = cohort["statistics"]
retVal[program_id] = program["statistics"]

return retVal, 200

Expand Down Expand Up @@ -442,7 +442,7 @@ def discovery_programs():
return fix_dicts(ret_val), 200

@app.route('/discovery/query')
def discovery_query(treatment="", primary_site="", drug_name="", chrom="", gene="", assembly="hg38", exclude_cohorts=[]):
def discovery_query(treatment="", primary_site="", drug_name="", chrom="", gene="", assembly="hg38", exclude_programs=[]):
url = f"{config.KATSU_URL}/v3/explorer/donors/"
headers = {}
for k in request.headers.keys():
Expand All @@ -453,7 +453,7 @@ def discovery_query(treatment="", primary_site="", drug_name="", chrom="", gene=
(treatment, "treatment_type"),
(primary_site, "primary_site"),
(drug_name, "systemic_therapy_drug_name"),
(exclude_cohorts, "exclude_cohorts")
(exclude_programs, "exclude_programs")
]
params = {
"page_size": PAGE_SIZE
Expand Down Expand Up @@ -500,12 +500,12 @@ def discovery_query(treatment="", primary_site="", drug_name="", chrom="", gene=
'age_at_diagnosis': {},
'treatment_type_count': {},
'primary_site_count': {},
'patients_per_cohort': {}
'patients_per_program': {}
}
summary_stat_mapping = [
('age_at_diagnosis', 'age_at_diagnosis'),
('treatment_type_count', 'treatment_type'),
('patients_per_cohort', 'program_id'),
('patients_per_program', 'program_id'),
('primary_site_count', 'primary_site')
]
for donor in donors:
Expand Down