Skip to content

Commit

Permalink
Enhance serving evaluation endpoints (#595)
Browse files Browse the repository at this point in the history
* Include metric_user_results in evaluation response, added eval json endpoint

* Remove query from response

* Utilize mapped inversed user id map to get original id in response

* Update serving test case to remove 'query' and add 'user_result' in response

* simplify user ID mapping

* Combined evaluation and evaluation_json endpoints

* Updated abort responses to show plaintext instead of html

* Added unit test cases

* Updated error response for empty data

* Added unit tests for provided data evaluation

* Update app.py

* Update test_app.py

---------

Co-authored-by: Quoc-Tuan Truong <[email protected]>
  • Loading branch information
darrylong and tqtg authored Mar 1, 2024
1 parent 92a94e3 commit 37db3c9
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 28 deletions.
81 changes: 54 additions & 27 deletions cornac/serving/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from cornac.metrics import *

try:
from flask import Flask, jsonify, request
from flask import Flask, jsonify, request, abort, make_response
except ImportError:
exit("Flask is required in order to serve models.\n" + "Run: pip3 install Flask")

Expand Down Expand Up @@ -185,7 +185,6 @@ def add_feedback():
return jsonify(data), 200


# curl -X POST -H "Content-Type: application/json" -d '{"metrics": ["RMSE()", "NDCG(k=10)"]}' "http://localhost:8080/evaluate"
@app.route("/evaluate", methods=["POST"])
def evaluate():
global model, train_set, metric_classnames
Expand All @@ -197,20 +196,59 @@ def evaluate():
return "Unable to evaluate. 'train_set' is not provided", 400

query = request.json
validate_query(query)

query_metrics = query.get("metrics")
rating_threshold = query.get("rating_threshold", 1.0)
exclude_unknowns = (
query.get("exclude_unknowns", "true").lower() == "true"
) # exclude unknown users/items by default, otherwise specified

if "data" in query:
data = query.get("data")
else:
data = []
data_fpath = "data/feedback.csv"
if os.path.exists(data_fpath):
reader = Reader()
data = reader.read(data_fpath, fmt="UIR", sep=",")

if not data:
response = make_response("No feedback has been provided so far. No data available to evaluate the model.")
response.status_code = 400
abort(response)

test_set = Dataset.build(
data,
fmt="UIR",
global_uid_map=train_set.uid_map,
global_iid_map=train_set.iid_map,
exclude_unknowns=exclude_unknowns,
)

return process_evaluation(test_set, query, exclude_unknowns)


def validate_query(query):
query_metrics = query.get("metrics")

if not query_metrics:
response = make_response("metrics is required")
response.status_code = 400
abort(response)
elif not isinstance(query_metrics, list):
response = make_response("metrics must be an array of metrics")
response.status_code = 400
abort(response)


def process_evaluation(test_set, query, exclude_unknowns):
global model, train_set

rating_threshold = query.get("rating_threshold", 1.0)
user_based = (
query.get("user_based", "true").lower() == "true"
) # user_based evaluation by default, otherwise specified

if query_metrics is None:
return "metrics is required", 400
elif not isinstance(query_metrics, list):
return "metrics must be an array of metrics", 400
query_metrics = query.get("metrics")

# organize metrics
metrics = []
Expand All @@ -226,24 +264,6 @@ def evaluate():

rating_metrics, ranking_metrics = BaseMethod.organize_metrics(metrics)

# read data
data = []
data_fpath = "data/feedback.csv"
if os.path.exists(data_fpath):
reader = Reader()
data = reader.read(data_fpath, fmt="UIR", sep=",")

if not len(data):
raise ValueError("No data available to evaluate the model.")

test_set = Dataset.build(
data,
fmt="UIR",
global_uid_map=train_set.uid_map,
global_iid_map=train_set.iid_map,
exclude_unknowns=exclude_unknowns,
)

# evaluation
result = BaseMethod.eval(
model=model,
Expand All @@ -258,10 +278,17 @@ def evaluate():
verbose=False,
)

# map user index back into the original user ID
metric_user_results = {}
for metric, user_results in result.metric_user_results.items():
metric_user_results[metric] = {
train_set.user_ids[int(k)]: v for k, v in user_results.items()
}

# response
response = {
"result": result.metric_avg_results,
"query": query,
"user_result": metric_user_results,
}

return jsonify(response), 200
Expand Down
52 changes: 51 additions & 1 deletion tests/cornac/serving/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,10 @@ def test_evaluate_json(client):
response = client.post('/evaluate', json=json_data)
# assert response.content_type == 'application/json'
assert response.status_code == 200
assert len(response.json['query']['metrics']) == 2
assert 'RMSE' in response.json['result']
assert 'Recall@5' in response.json['result']
assert 'RMSE' in response.json['user_result']
assert 'Recall@5' in response.json['user_result']


def test_evalulate_incorrect_get(client):
Expand All @@ -110,3 +111,52 @@ def test_evalulate_incorrect_post(client):
response = client.post('/evaluate')
assert response.status_code == 415 # bad request, expect json


def test_evaluate_missing_metrics(client):
json_data = {
'metrics': []
}
response = client.post('/evaluate', json=json_data)
assert response.status_code == 400
assert response.data == b'metrics is required'


def test_evaluate_not_list_metrics(client):
json_data = {
'metrics': 'RMSE()'
}
response = client.post('/evaluate', json=json_data)
assert response.status_code == 400
assert response.data == b'metrics must be an array of metrics'


def test_recommend_missing_uid(client):
response = client.get('/recommend?k=5')
assert response.status_code == 400
assert response.data == b'uid is required'


def test_evaluate_use_data(client):
json_data = {
'metrics': ['RMSE()', 'Recall(k=5)'],
'data': [['930', '795', 5], ['195', '795', 3]]
}
response = client.post('/evaluate', json=json_data)
# assert response.content_type == 'application/json'
assert response.status_code == 200
assert 'RMSE' in response.json['result']
assert 'Recall@5' in response.json['result']
assert 'RMSE' in response.json['user_result']
assert 'Recall@5' in response.json['user_result']


def test_evaluate_use_data_empty(client):
json_data = {
'metrics': ['RMSE()', 'Recall(k=5)'],
'data': []
}
response = client.post('/evaluate', json=json_data)
assert response.status_code == 400
assert response.data == b"No feedback has been provided so far. No data available to evaluate the model."


0 comments on commit 37db3c9

Please sign in to comment.