-
Notifications
You must be signed in to change notification settings - Fork 0
/
14_export_production_assets.py
107 lines (86 loc) · 3.08 KB
/
14_export_production_assets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import copy
import os
import numpy as np
from utils.constants import NOVEL_MODEL_OUTPUT_DIR, PRODUCTION_OUTPUT_DIR
from utils.io import load_object, save_object
from utils.model.novel import NovelModel
from utils.report import Reporter
reporter = Reporter()
reporter.title(
"Export (non-PHI) assets needed to deploy the novel model, plus the "
"lactate and albumin imputation models, in production."
)
reporter.report("Loading pretrained production novel model")
novel_model: NovelModel = load_object(
os.path.join(
NOVEL_MODEL_OUTPUT_DIR,
'13_novel_model_production.pkl'
)
)
reporter.report("Storing production models")
assets = {
'albumin': {'model': copy.deepcopy(novel_model.alb_imputer.imputers[0])},
'lactate': {'model': copy.deepcopy(novel_model.lac_imputer.imputers[0])},
'mortality': {'model': novel_model.models[0]}
}
reporter.report("Consolidating and storing Winsorization thresholds")
assets['winsor_thresholds'] = copy.deepcopy(
novel_model.cat_imputer.swm.winsor_thresholds[0]
)
assets['winsor_thresholds'][
novel_model.alb_imputer.lacalb_variable_name
] = copy.deepcopy(novel_model.alb_imputer.winsor_thresholds[0])
assets['winsor_thresholds'][
novel_model.lac_imputer.lacalb_variable_name
] = copy.deepcopy(novel_model.lac_imputer.winsor_thresholds[0])
assets['winsor_thresholds']['S01AgeOnArrival'][0] = 18.0
del assets['winsor_thresholds']['S03GlasgowComaScore']
reporter.report(
"Storing format of the input data for each model (not the data itself)"
)
model_input_data = { # This dictionary is NOT for export with other assets
'albumin': novel_model.alb_imputer._get_features_where_lacalb_missing(
fold_name='train',
split_i=0,
mice_imp_i=0
),
'lactate': novel_model.lac_imputer._get_features_where_lacalb_missing(
fold_name='train',
split_i=0,
mice_imp_i=0
),
'mortality': novel_model.get_features_and_labels(
fold_name='train',
split_i=0,
mice_imp_i=0,
lac_alb_imp_i=0
)[0]
}
for model_name, features in model_input_data.items():
assets[model_name]['input_data'] = {
'dtypes': copy.deepcopy(features.dtypes),
'describe': features.describe(),
'unique_categories': {}
}
for c in features[
features.columns.difference(list(assets['winsor_thresholds'].keys()))
].columns:
assets[model_name]['input_data']['unique_categories'][c] = np.sort(
features[c].unique()
)
reporter.report("Storing transformer for both imputation models")
assets['albumin']['transformer'] = novel_model.alb_imputer.transformers[0]
assets['lactate']['transformer'] = novel_model.lac_imputer.transformers[0]
reporter.report("Storing label encoding for non-binary categorical labels")
assets['label_encoding'] = load_object(
os.path.join(
NOVEL_MODEL_OUTPUT_DIR,
"05_multi_category_levels_with_indications.pkl"
)
)
reporter.report("Exporting all stored assets")
save_object(
assets,
os.path.join(PRODUCTION_OUTPUT_DIR, 'production_assets.pkl')
)
reporter.last('Done.')