Skip to content

Commit

Permalink
fix: grouping and column ordering (#153)
Browse files Browse the repository at this point in the history
  • Loading branch information
ireneisdoomed authored Jan 27, 2023
1 parent 6a01be4 commit 645f872
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 1 deletion.
2 changes: 1 addition & 1 deletion modules/GeneBurden.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,11 @@ def read_gene_burden_curation(curated_data: str) -> DataFrame:
T.StructField('ancestry', T.StringType(), True),
T.StructField('ancestryId', T.StringType(), True),
T.StructField('cohortId', T.StringType(), True),
T.StructField('studyId', T.StringType(), True),
T.StructField('studySampleSize', T.IntegerType(), True),
T.StructField('studyCases', T.IntegerType(), True),
T.StructField('studyCasesWithQualifyingVariants', T.IntegerType(), True),
T.StructField('allelicRequirements', T.StringType(), True),
T.StructField('studyId', T.StringType(), True),
T.StructField('statisticalMethod', T.StringType(), True),
T.StructField('statisticalMethodOverview', T.StringType(), True),
T.StructField('literature', T.StringType(), True),
Expand Down
12 changes: 12 additions & 0 deletions modules/TargetSafety.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,18 @@ def process_aop(aopwiki: str) -> DataFrame:
.withColumn('isHumanApplicable', F.when(F.col('isHumanApplicable') != F.lit(True), F.col("isHumanApplicable")))
# data bug: some events have the substring "NA" at the start - removal and trim the string
.withColumn('event', F.trim(F.regexp_replace(F.col('event'), '^NA', '')))
# data bug: effects.direction need to be in lowercase, this field is an enum
.withColumn(
'effect',
F.transform(
F.col('effects'),
lambda x: F.struct(
F.lower(x.direction).alias('direction'),
x.dosing.alias('dosing')).alias('effects')
)
)
# I need to convert the biosamples array into a struct so that data is parsed the same way as the rest of the sources
.withColumn('biosample', F.explode('biosamples'))
)

def process_adverse_events(adverse_events: str) -> DataFrame:
Expand Down

0 comments on commit 645f872

Please sign in to comment.