Skip to content

Commit

Permalink
Merge pull request #266 from CanDIG/bugfix/fastquery
Browse files Browse the repository at this point in the history
Performance improvement: Combine multiple queries that are commonly performed together into one endpoint
  • Loading branch information
SonQBChau authored Sep 23, 2024
2 parents eaff22b + 75b7ee7 commit b386884
Show file tree
Hide file tree
Showing 7 changed files with 1,375 additions and 74 deletions.
75 changes: 74 additions & 1 deletion chord_metadata_service/mohpackets/apis/clinical_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,15 @@
from http import HTTPStatus
from typing import Dict, List

from django.db.models import Prefetch, Q
from django.contrib.postgres.aggregates import ArrayAgg
from django.contrib.postgres.expressions import ArraySubquery
from django.db.models import (
Func,
OuterRef,
Prefetch,
Q,
Subquery,
)
from ninja import Query

from chord_metadata_service.mohpackets.models import (
Expand All @@ -28,6 +36,7 @@
SystemicTherapyFilterSchema,
ComorbidityFilterSchema,
DonorFilterSchema,
DonorExplorerFilterSchema,
ExposureFilterSchema,
FollowUpFilterSchema,
PrimaryDiagnosisFilterSchema,
Expand All @@ -47,6 +56,7 @@
FollowUpModelSchema,
PrimaryDiagnosisModelSchema,
ProgramModelSchema,
QueryDonorSchema,
RadiationModelSchema,
SampleRegistrationModelSchema,
SpecimenModelSchema,
Expand Down Expand Up @@ -168,6 +178,69 @@ def list_donors(request, filters: Query[DonorFilterSchema]):
return Donor.objects.filter(q)


@router.get("/query/", response=List[QueryDonorSchema])
def query_donors(request, filters: DonorExplorerFilterSchema = Query(...)):
"""
Used by the query service to return donors along with their sample IDs, treatment types, and primary sites.
"""
filter_dict = filters.dict()
queryset = (
Donor.objects.filter(Q(program_id__in=request.read_datasets))
.select_related("program_id")
.prefetch_related(
"treatment_set",
"primarydiagnosis_set",
"systemictherapy_set",
"sampleregistration_set",
)
.distinct()
)

if filter_dict["primary_site"]:
queryset = queryset.filter(
primarydiagnosis__primary_site__in=filter_dict["primary_site"]
)

if filter_dict["treatment_type"]:
queryset = queryset.filter(
treatment__treatment_type__overlap=filter_dict["treatment_type"]
)

if filter_dict["systemic_therapy_drug_name"]:
queryset = queryset.filter(
systemictherapy__drug_name__in=filter_dict["systemic_therapy_drug_name"]
)

if filter_dict["exclude_cohorts"]:
queryset = queryset.exclude(program_id__in=filter_dict["exclude_cohorts"])

class Unnest(Func):
contains_subquery = True
function = "unnest"

# treatment can have duplicates for counting purpose
treatment_type_names = (
Treatment.objects.filter(donor_uuid_id=OuterRef("uuid"))
.annotate(treatment_type_list=Unnest("treatment_type"))
.values_list("treatment_type_list", flat=True)
)

donors = queryset.annotate(
submitter_sample_ids=ArrayAgg(
"sampleregistration__submitter_sample_id",
distinct=True,
filter=~Q(sampleregistration__submitter_sample_id=None),
),
primary_site=ArrayAgg(
"primarydiagnosis__primary_site",
distinct=True,
filter=~Q(primarydiagnosis__primary_site=None),
),
treatment_type=ArraySubquery(Subquery(treatment_type_names)),
)
return donors


def check_filter_donor_with_program(filters):
if filters.submitter_donor_id and not filters.program_id:
error_message = {"error": "submitter_donor_id filter requires program_id"}
Expand Down
2 changes: 1 addition & 1 deletion chord_metadata_service/mohpackets/docs/schema_version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3ed28cbab36e7b1fd4c441cd729211035539486a
aff7d68b373affb90ad1bb3a0e50a11371181b9d
Loading

0 comments on commit b386884

Please sign in to comment.