Skip to content

Commit

Permalink
Merge pull request #5 from zakraicik/evaluate-model
Browse files Browse the repository at this point in the history
Evaluate model
  • Loading branch information
zakraicik authored Sep 16, 2024
2 parents 79510b5 + be0f0d9 commit 913605f
Show file tree
Hide file tree
Showing 8 changed files with 282 additions and 29 deletions.
14 changes: 7 additions & 7 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file added special_train/eval/actual_vs_expected.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added special_train/eval/confidence_by_timestep.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added special_train/eval/error_scatter.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
198 changes: 198 additions & 0 deletions special_train/eval/eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
import os
import boto3
import numpy as np
import matplotlib.pyplot as plt

from scipy.stats import t
from datetime import datetime
from special_train.config import (
AWS_REGION,
S3_ETHEREUM_FORECAST_BUCKET,
S3_X_TRAIN_KEY,
S3_Y_TRAIN_KEY,
S3_X_VAL_KEY,
S3_Y_VAL_KEY,
S3_X_TEST_KEY,
S3_Y_TEST_KEY,
N,
)
from special_train.utils import (
load_model_from_s3,
load_numpy_from_s3,
load_object_from_s3,
)

aws_access_key = os.environ.get("AWS_ACCESS_KEY")
aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY")

session = boto3.Session(
aws_access_key_id=aws_access_key,
aws_secret_access_key=aws_secret_access_key,
region_name=AWS_REGION,
)

aws_secret_client = session.client(service_name="secretsmanager")
aws_s3_client = session.client(service_name="s3")


def plot_actual_vs_expected(
targets, predictions, N, filename="special_train/eval/actual_vs_expected.png"
):

plt.figure(figsize=(12, 6))

plt.plot(targets, label="Actual Close Price", linestyle="-", alpha=0.7)

forecast_steps = N
for i in range(0, len(predictions) - forecast_steps + 1, forecast_steps):
plt.plot(
range(i, i + forecast_steps),
predictions[i : i + forecast_steps],
linestyle="--",
alpha=0.6,
color="red",
)

plt.xlabel("Time")
plt.ylabel("Close Price")
plt.title("Actual vs. Predicted Close Price (Multistep)")

plt.savefig(os.path.join(os.getcwd(), filename), bbox_inches="tight")
plt.close()


def plot_error_scatter(
targets, predictions, filename="special_train/eval/error_scatter.png"
):

plt.figure(figsize=(8, 8))

plt.scatter(predictions, targets, alpha=0.5, label="Predicted vs. Actual")

min_val = min(np.min(predictions), np.min(targets))
max_val = max(np.max(predictions), np.max(targets))

plt.plot([min_val, max_val], [min_val, max_val], "r--", label="Perfect Match")

plt.xlabel("Predicted Close Price")
plt.ylabel("Actual Close Price")
plt.title("Predicted vs. Actual Close Price")
plt.legend()

plt.savefig(os.path.join(os.getcwd(), filename), bbox_inches="tight")
plt.close()


def plot_confidence_by_timestep(
targets,
predictions,
confidence_level=0.95,
filename="special_train/eval/confidence_by_timestep.png",
):

num_samples, N = predictions.shape

absolute_errors = np.abs(targets - predictions)

mae_by_step = np.mean(absolute_errors, axis=0)

std_error = np.std(absolute_errors, axis=0, ddof=1) / np.sqrt(num_samples)

degrees_freedom = num_samples - 1

t_crit = np.abs(t.ppf((1 - confidence_level) / 2, degrees_freedom))

margin_of_error = t_crit * std_error

ci_lower = mae_by_step - margin_of_error
ci_upper = mae_by_step + margin_of_error

plt.figure(figsize=(12, 6))
timesteps = np.arange(1, N + 1)

plt.plot(
timesteps,
mae_by_step,
marker="o",
linestyle="-",
color="b",
label="Mean Absolute Error",
)

plt.fill_between(
timesteps,
ci_lower,
ci_upper,
color="b",
alpha=0.2,
label=f"{int(confidence_level * 100)}% Confidence Interval",
)

plt.xlabel("Time Step Ahead")
plt.ylabel("Mean Absolute Error (MAE)")
plt.title("Mean Absolute Error by Time Step with Confidence Intervals")
plt.legend()
plt.grid(True)

plt.savefig(os.path.join(os.getcwd(), filename), bbox_inches="tight")
plt.close()


def plot_error_distribution_by_timestep(
targets,
predictions,
filename="special_train/eval/error_distribution_by_timestep.png",
):

num_samples, N = predictions.shape

errors = targets - predictions

plt.figure(figsize=(12, 8))

for step in range(N):
plt.subplot(N // 2, 2, step + 1)
plt.hist(
errors[:, step],
bins=30,
edgecolor="k",
alpha=0.7,
)
plt.title(f"Error Distribution for Step {step + 1}")
plt.xlabel("Error")
plt.ylabel("Frequency")

plt.tight_layout()

plt.savefig(os.path.join(os.getcwd(), filename), bbox_inches="tight")
plt.close()


if __name__ == "__main__":

X_test = load_numpy_from_s3(
aws_s3_client, S3_ETHEREUM_FORECAST_BUCKET, S3_X_TEST_KEY
)

y_test = load_numpy_from_s3(
aws_s3_client, S3_ETHEREUM_FORECAST_BUCKET, S3_Y_TEST_KEY
)

model = load_model_from_s3(aws_s3_client, S3_ETHEREUM_FORECAST_BUCKET)

target_scaler = load_object_from_s3(
aws_s3_client, S3_ETHEREUM_FORECAST_BUCKET, "modeling_utils/target_scaler.pkl"
)

inversed_targets = target_scaler.inverse_transform(y_test)

predicted_close = model.predict(X_test)
inversed_predicted_close = target_scaler.inverse_transform(predicted_close)

plot_actual_vs_expected(inversed_targets, inversed_predicted_close, N)

plot_error_scatter(inversed_targets, inversed_predicted_close)

plot_confidence_by_timestep(inversed_targets, inversed_predicted_close)

plot_error_distribution_by_timestep(inversed_targets, inversed_predicted_close)
37 changes: 15 additions & 22 deletions special_train/train/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,13 @@
patience=10, monitor="val_loss", restore_best_weights=True
)

reduce_lr = ReduceLROnPlateau(
monitor="val_loss",
factor=0.5,
patience=5,
min_lr=1e-6,
verbose=1,
)
# reduce_lr = ReduceLROnPlateau(
# monitor="val_loss",
# factor=0.5,
# patience=5,
# min_lr=1e-6,
# verbose=1,
# )


def build_model(hp):
Expand Down Expand Up @@ -115,7 +115,7 @@ def tune_model(
epochs=epochs,
batch_size=batch_size,
validation_data=(X_val, y_val),
callbacks=[early_stopping, reduce_lr],
callbacks=[early_stopping],
)

best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
Expand Down Expand Up @@ -144,21 +144,14 @@ def tune_model(
y_val,
epochs=100,
batch_size=32,
max_trials=1,
max_trials=20,
executions_per_trial=1,
)

model = tuner.hypermodel.build(best_hp)

X_combined = np.concatenate((X_train, X_val), axis=0)
y_combined = np.concatenate((y_train, y_val), axis=0)

history = model.fit(
X_combined,
y_combined,
epochs=100,
batch_size=32,
validation_split=0.1,
shuffle=False,
callbacks=[early_stopping, reduce_lr],
best_model = tuner.get_best_models(num_models=1)[0]
best_model.compile(
optimizer=Adam(learning_rate=best_hp.get("learning_rate"), clipvalue=1),
loss="mae",
metrics=["mae", "mse", "mape"],
)
save_model_to_s3(best_model, aws_s3_client, S3_ETHEREUM_FORECAST_BUCKET)
62 changes: 62 additions & 0 deletions special_train/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
import numpy as np
import pickle
import io
import os
import tempfile
import tensorflow as tf

from datetime import datetime
from io import BytesIO
Expand Down Expand Up @@ -89,6 +92,65 @@ def save_object_to_s3(s3_client, obj, bucket, key):
s3_client.put_object(Bucket=bucket, Key=key, Body=serialized_obj)


def load_object_from_s3(s3_client, bucket, key):
response = s3_client.get_object(Bucket=bucket, Key=key)
serialized_obj = response["Body"].read()
obj = pickle.loads(serialized_obj)
return obj


def save_model_to_s3(model, aws_s3_client, bucket):
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

with tempfile.TemporaryDirectory() as temp_dir:

temp_path = os.path.join(temp_dir, f"{timestamp}.keras")
model.save(temp_path)

s3_file = f"models/{timestamp}.keras"
aws_s3_client.upload_file(temp_path, bucket, s3_file)


def _most_recent_model(aws_s3_client, bucket):

response = aws_s3_client.list_objects_v2(Bucket=bucket, Prefix="models/")

if "Contents" not in response:
return None

model_files = {}
for obj in response["Contents"]:
obj_key = obj["Key"]
try:
date_str = obj_key.split(".")[0].split("/")[-1]
model_date = datetime.strptime(date_str, "%Y%m%d_%H%M%S")
model_files[model_date] = obj_key
except ValueError:
continue

if model_files:
most_recent_date = max(model_files.keys())
most_recent_file = model_files[most_recent_date]
return most_recent_file.split("/")[-1]
else:
return None


def load_model_from_s3(aws_s3_client, bucket, model_name=None):

if model_name is not None:
pass
else:
model_name = _most_recent_model(aws_s3_client, bucket)

with tempfile.NamedTemporaryFile(suffix=".keras", delete=False) as temp_file:
key = f"models/{model_name}"
aws_s3_client.download_file(bucket, key, temp_file.name)
model = tf.keras.models.load_model(temp_file.name)
os.unlink(temp_file.name)
return model


def load_raw_data(aws_s3_client, bucket, key):

logger.info("Downloading raw data from S3...")
Expand Down

0 comments on commit 913605f

Please sign in to comment.