-
Notifications
You must be signed in to change notification settings - Fork 0
/
simple_models.py
101 lines (85 loc) · 3.67 KB
/
simple_models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import numpy as np
#classifiers
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score
# regression models
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
def clf_task(X_train,y_train,X_test,y_test):
def evaluate_clf(clf, X_train, y_train, X_test, y_test):
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
y_pred = np.array(y_pred)
y_test = np.array(y_test)
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("F1: ", f1_score(y_test, y_pred))
print("Logistic Regression")
clf = LogisticRegression(random_state=0)
evaluate_clf(clf, X_train, y_train, X_test, y_test)
print()
print("Random Forest")
clf = RandomForestClassifier(random_state=0)
evaluate_clf(clf, X_train, y_train, X_test, y_test)
feature_importances = clf.feature_importances_
print("Feature importances: ", feature_importances)
print()
print("SVM")
clf = SVC(random_state=0)
evaluate_clf(clf, X_train, y_train, X_test, y_test)
print()
print("KNN")
clf = KNeighborsClassifier()
evaluate_clf(clf, X_train, y_train, X_test, y_test)
print()
print("MLP")
clf = MLPClassifier(random_state=0)
evaluate_clf(clf, X_train, y_train, X_test, y_test)
def reg_task( x_train, y_train, x_test, y_test, y_test_drug_binary):
def evaluate_regression_model(model, X_train, y_train, X_test, y_test, y_test_drug_binary):
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
for group in "overall", "drug", "control":
if group == "drug":
mask = y_test_drug_binary
elif group == "control":
mask = ~y_test_drug_binary
else:
mask = np.ones(len(y_test), dtype=bool)
y_pred = np.array(y_pred)
y_test = np.array(y_test)
mse = mean_squared_error(y_test[mask], y_pred[mask])
mae = mean_absolute_error(y_test[mask], y_pred[mask])
rmse = np.sqrt(mse)
r2 = r2_score(y_test[mask], y_pred[mask])
print(f"{group.capitalize()} - MSE: {mse:.4f}, MAE: {mae:.4f}, RMSE: {rmse:.4f}, R2: {r2:.4f}")
print("Linear Regression")
model = LinearRegression()
evaluate_regression_model(model, x_train, y_train, x_test, y_test, y_test_drug_binary)
print()
print("SVR")
model = SVR()
evaluate_regression_model(model, x_train, y_train, x_test, y_test, y_test_drug_binary)
print()
print("Random Forest")
model = RandomForestRegressor(random_state=0)
evaluate_regression_model(model, x_train, y_train, x_test, y_test, y_test_drug_binary)
feature_importances = model.feature_importances_
print("Feature importances: ", feature_importances)
print()
print("Gaussian Process")
kernel = RBF() + WhiteKernel()
model = GaussianProcessRegressor(kernel=kernel, random_state=0)
evaluate_regression_model(model, x_train, y_train, x_test, y_test, y_test_drug_binary)
print()
print("MLP")
model = MLPRegressor(random_state=0)
evaluate_regression_model(model, x_train, y_train, x_test, y_test, y_test_drug_binary)