From c2044aa220f7fe1989ea31358f3736a91e80b4a6 Mon Sep 17 00:00:00 2001 From: Ewen Date: Mon, 23 Dec 2024 02:13:19 +0100 Subject: [PATCH 1/2] corrections erreur Marthe --- .../create_update_excursions_segments.py | 206 +++++++++++------- 1 file changed, 129 insertions(+), 77 deletions(-) diff --git a/backend/bloom/tasks/create_update_excursions_segments.py b/backend/bloom/tasks/create_update_excursions_segments.py index b3a66505..bc028036 100644 --- a/backend/bloom/tasks/create_update_excursions_segments.py +++ b/backend/bloom/tasks/create_update_excursions_segments.py @@ -124,91 +124,143 @@ def run(): for vessel_id in batch["vessel_id"].unique(): df_end = batch.loc[batch["vessel_id"] == vessel_id].copy() df_end.rename(columns={"timestamp": "timestamp_end", - "heading": "heading_at_end", - "speed": "speed_at_end", - "longitude": "end_longitude", - "latitude": "end_latitude" - }, inplace=True) + "heading": "heading_at_end", + "speed": "speed_at_end", + "longitude": "end_longitude", + "latitude": "end_latitude" + }, inplace=True) df_end.sort_values("timestamp_end", inplace=True) df_end.reset_index(drop=True, inplace=True) # get every end entry but the last one ; each one of them will be the start point of a segment if len(df_end)>1: df_start = df_end.iloc[0:-1, :].copy() - else: - df_start=df_end.copy() - for col in df_start.columns: - df_start.rename(columns={col: col.replace("end", "start")}, inplace=True) - vessel_last_segment = pd.DataFrame() - if last_segment.shape[0] > 0: - vessel_last_segment = last_segment.loc[ - last_segment["vessel_id"] == vessel_id, ["mmsi", "timestamp_end", "heading_at_end", "speed_at_end", - "end_position", "excursion_id", "arrival_port_id"]] - vessel_last_segment["start_latitude"] = vessel_last_segment["end_position"].apply(lambda x: x.y) - vessel_last_segment["start_longitude"] = vessel_last_segment["end_position"].apply(lambda x: x.x) - vessel_last_segment.drop("end_position", inplace=True, axis=1) - vessel_last_segment.rename(columns={"timestamp_end": "timestamp_start", - "heading_at_end": "heading_at_start", - "speed_at_end": "speed_at_start", - }, inplace=True) - - if vessel_last_segment.shape[0] > 0: - # if there's a last segment for this vessel, then it's not the first time a position for this vessel is received - is_new_vessel = False - # and it becomes the start point of the first new segment - df_start.loc[-1] = vessel_last_segment.iloc[0] - df_start.sort_index(inplace=True) - df_start.reset_index(drop=True, inplace=True) - # checks if the excursion of the last segment is closed or not - if vessel_last_segment["arrival_port_id"].iloc[0] >= 0: - open_ongoing_excursion = False - else: - open_ongoing_excursion = True - ongoing_excursion_id = int(vessel_last_segment["excursion_id"].iloc[0]) + for col in df_start.columns: + df_start.rename(columns={col: col.replace("end", "start")}, inplace=True) + vessel_last_segment = pd.DataFrame() + if last_segment.shape[0] > 0: + vessel_last_segment = last_segment.loc[ + last_segment["vessel_id"] == vessel_id, ["mmsi", "timestamp_end", "heading_at_end", "speed_at_end", + "end_position", "excursion_id", "arrival_port_id"]] + vessel_last_segment["start_latitude"] = vessel_last_segment["end_position"].apply(lambda x: x.y) + vessel_last_segment["start_longitude"] = vessel_last_segment["end_position"].apply(lambda x: x.x) + vessel_last_segment.drop("end_position", inplace=True, axis=1) + vessel_last_segment.rename(columns={"timestamp_end": "timestamp_start", + "heading_at_end": "heading_at_start", + "speed_at_end": "speed_at_start", + }, inplace=True) + if vessel_last_segment.shape[0] > 0: + # if there's a last segment for this vessel, then it's not the first time a position for this vessel is received + is_new_vessel = False + # and it becomes the start point of the first new segment + df_start.loc[-1] = vessel_last_segment.iloc[0] + df_start.sort_index(inplace=True) + df_start.reset_index(drop=True, inplace=True) + # checks if the excursion of the last segment is closed or not + if vessel_last_segment["arrival_port_id"].iloc[0] >= 0: + open_ongoing_excursion = False + else: + open_ongoing_excursion = True + ongoing_excursion_id = int(vessel_last_segment["excursion_id"].iloc[0]) + else: + # if there's no last segment for this vessel, then it's the first time a position for this vessel is received + is_new_vessel = True + # so we duplicate the starting point to have the first segment while setting it up 1s behind in time (so timestamp_start != timestamp_end) + df_start.loc[-1] = df_start.loc[0] + df_start["timestamp_start"].iloc[-1] += timedelta(0, -1) + df_start.sort_index(inplace=True) + df_start.reset_index(drop=True, inplace=True) + open_ongoing_excursion = False + # concat start and end point together + df = pd.concat([df_start, df_end], axis=1) + # removing segment with same timestamp_start and timestamp_end (no update) + df = df[df["timestamp_start"] != df["timestamp_end"]].copy() + # reseting index + df.reset_index(inplace=True, drop=True) + if (df.shape[0] > 0): + # calculate distance + def get_distance_in_miles(x) -> float: + p1 = (x.start_latitude, x.start_longitude) + p2 = (x.end_latitude, x.end_longitude) + return distance.distance(p1, p2).miles + + df["distance"] = df.apply(get_distance_in_miles, axis=1) + print(df) else: - # if there's no last segment for this vessel, then it's the first time a position for this vessel is received - is_new_vessel = True - # so we duplicate the starting point to have the first segment while setting it up 1s behind in time (so timestamp_start != timestamp_end) - df_start.loc[-1] = df_start.loc[0] - df_start["timestamp_start"].iloc[-1] += timedelta(0, -1) - df_start.sort_index(inplace=True) - df_start.reset_index(drop=True, inplace=True) - open_ongoing_excursion = False - - # concat start and end point together - df = pd.concat([df_start, df_end], axis=1) - - # removing segment with same timestamp_start and timestamp_end (no update) - df = df[df["timestamp_start"] != df["timestamp_end"]].copy() - # reseting index - df.reset_index(inplace=True, drop=True) - if (df.shape[0] > 0): - # calculate distance - def get_distance_in_miles(x) -> float: - p1 = (x.start_latitude, x.start_longitude) - p2 = (x.end_latitude, x.end_longitude) - return distance.distance(p1, p2).miles - - df["distance"] = df.apply(get_distance_in_miles, axis=1) - - # calculate duration in seconds - def get_duration(x) -> float: - return (x.timestamp_end - x.timestamp_start).total_seconds() - - df["segment_duration"] = df.apply(get_duration, axis=1) - - # set default type as AT_SEA - df["type"] = "AT_SEA" - - # set type as default_ais for segment with duration > 35 min - df.loc[df["segment_duration"] >= 2100, "type"] = "DEFAULT_AIS" - - # calculate average speed in knot - df["average_speed"] = df["distance"] / (df["segment_duration"] / 3600) + #df_start=df_end.copy() + vessel_last_segment = pd.DataFrame() + if last_segment.shape[0] > 0: + vessel_last_segment = last_segment.loc[ + last_segment["vessel_id"] == vessel_id, ["mmsi", "timestamp_end", "heading_at_end", "speed_at_end", + "end_position", "excursion_id", "arrival_port_id"]] + vessel_last_segment["start_latitude"] = vessel_last_segment["end_position"].apply(lambda x: x.y) + vessel_last_segment["start_longitude"] = vessel_last_segment["end_position"].apply(lambda x: x.x) + vessel_last_segment.drop("end_position", inplace=True, axis=1) + vessel_last_segment.rename(columns={"timestamp_end": "timestamp_start", + "heading_at_end": "heading_at_start", + "speed_at_end": "speed_at_start", + }, inplace=True) + if vessel_last_segment.shape[0] > 0: + # if there's a last segment for this vessel, then it's not the first time a position for this vessel is received + is_new_vessel = False + df_start= vessel_last_segment + df_start.reset_index(drop=True, inplace=True) + # checks if the excursion of the last segment is closed or not + if vessel_last_segment["arrival_port_id"].iloc[0] >= 0: + open_ongoing_excursion = False + else: + open_ongoing_excursion = True + ongoing_excursion_id = int(vessel_last_segment["excursion_id"].iloc[0]) - # set last_vessel_segment - df["last_vessel_segment"] = 0 - df["last_vessel_segment"].iloc[-1] = 1 + else: + # if there's no last segment for this vessel, then it's the first time a position for this vessel is received + is_new_vessel = True + # so we duplicate the starting point to have the first segment while setting it up 1s behind in time (so timestamp_start != timestamp_end) + df_start = df_end.copy() + for col in df_start.columns: + df_start.rename(columns={col: col.replace("end", "start")}, inplace=True) + df_start["timestamp_start"] += timedelta(0, -1) + df_start.sort_index(inplace=True) + df_start.reset_index(drop=True, inplace=True) + open_ongoing_excursion = False + # concat start and end point together + df = pd.concat([df_start, df_end], axis=1) + # removing segment with same timestamp_start and timestamp_end (no update) + df = df[df["timestamp_start"] != df["timestamp_end"]].copy() + # reseting index + df.reset_index(inplace=True, drop=True) + #print(df) + + if (df.shape[0] > 0): + # calculate distance + def get_distance_in_miles(x) -> float: + p1 = (x.start_latitude, x.start_longitude) + p2 = (x.end_latitude, x.end_longitude) + return distance.distance(p1, p2).miles + + df["distance"] = df.apply(get_distance_in_miles, axis=1) + + # calculate duration in seconds + def get_duration(x) -> float: + return (x.timestamp_end - x.timestamp_start).total_seconds() + + df["segment_duration"] = df.apply(get_duration, axis=1) + + # set default type as AT_SEA + df["type"] = "AT_SEA" + + # set type as default_ais for segment with duration > 35 min + df.loc[df["segment_duration"] >= 2100, "type"] = "DEFAULT_AIS" + + # calculate average speed in knot + df["average_speed"] = df["distance"] / (df["segment_duration"] / 3600) + + # set last_vessel_segment + df["last_vessel_segment"] = 0 + if len(df) >1 : + df["last_vessel_segment"].iloc[-1] = 1 + else : + df["last_vessel_segment"] = 1 # check if segment ends in a port (only for segment with average_speed < maximal_speed_to_check_if_in_port or with type 'DEFAULT_AIS') def get_port(x, session): From 2ad4b9f62ef4653898f59aa70ec8177fad202cb9 Mon Sep 17 00:00:00 2001 From: Ewen Date: Mon, 23 Dec 2024 13:38:29 +0100 Subject: [PATCH 2/2] corrections --- .../create_update_excursions_segments.py | 163 +++++++++--------- 1 file changed, 80 insertions(+), 83 deletions(-) diff --git a/backend/bloom/tasks/create_update_excursions_segments.py b/backend/bloom/tasks/create_update_excursions_segments.py index bc028036..e0706a8d 100644 --- a/backend/bloom/tasks/create_update_excursions_segments.py +++ b/backend/bloom/tasks/create_update_excursions_segments.py @@ -124,11 +124,11 @@ def run(): for vessel_id in batch["vessel_id"].unique(): df_end = batch.loc[batch["vessel_id"] == vessel_id].copy() df_end.rename(columns={"timestamp": "timestamp_end", - "heading": "heading_at_end", - "speed": "speed_at_end", - "longitude": "end_longitude", - "latitude": "end_latitude" - }, inplace=True) + "heading": "heading_at_end", + "speed": "speed_at_end", + "longitude": "end_longitude", + "latitude": "end_latitude" + }, inplace=True) df_end.sort_values("timestamp_end", inplace=True) df_end.reset_index(drop=True, inplace=True) # get every end entry but the last one ; each one of them will be the start point of a segment @@ -185,7 +185,6 @@ def get_distance_in_miles(x) -> float: return distance.distance(p1, p2).miles df["distance"] = df.apply(get_distance_in_miles, axis=1) - print(df) else: #df_start=df_end.copy() vessel_last_segment = pd.DataFrame() @@ -229,97 +228,95 @@ def get_distance_in_miles(x) -> float: df = df[df["timestamp_start"] != df["timestamp_end"]].copy() # reseting index df.reset_index(inplace=True, drop=True) - #print(df) - if (df.shape[0] > 0): - # calculate distance - def get_distance_in_miles(x) -> float: - p1 = (x.start_latitude, x.start_longitude) - p2 = (x.end_latitude, x.end_longitude) - return distance.distance(p1, p2).miles + if (df.shape[0] > 0): + # calculate distance + def get_distance_in_miles(x) -> float: + p1 = (x.start_latitude, x.start_longitude) + p2 = (x.end_latitude, x.end_longitude) + return distance.distance(p1, p2).miles - df["distance"] = df.apply(get_distance_in_miles, axis=1) + df["distance"] = df.apply(get_distance_in_miles, axis=1) - # calculate duration in seconds - def get_duration(x) -> float: - return (x.timestamp_end - x.timestamp_start).total_seconds() + # calculate duration in seconds + def get_duration(x) -> float: + return (x.timestamp_end - x.timestamp_start).total_seconds() - df["segment_duration"] = df.apply(get_duration, axis=1) + df["segment_duration"] = df.apply(get_duration, axis=1) - # set default type as AT_SEA - df["type"] = "AT_SEA" + # set default type as AT_SEA + df["type"] = "AT_SEA" - # set type as default_ais for segment with duration > 35 min - df.loc[df["segment_duration"] >= 2100, "type"] = "DEFAULT_AIS" + # set type as default_ais for segment with duration > 35 min + df.loc[df["segment_duration"] >= 2100, "type"] = "DEFAULT_AIS" - # calculate average speed in knot - df["average_speed"] = df["distance"] / (df["segment_duration"] / 3600) + # calculate average speed in knot + df["average_speed"] = df["distance"] / (df["segment_duration"] / 3600) - # set last_vessel_segment - df["last_vessel_segment"] = 0 - if len(df) >1 : - df["last_vessel_segment"].iloc[-1] = 1 - else : - df["last_vessel_segment"] = 1 + # set last_vessel_segment + df["last_vessel_segment"] = 0 + if len(df) >1 : + df["last_vessel_segment"].iloc[-1] = 1 + else : + df["last_vessel_segment"] = 1 # check if segment ends in a port (only for segment with average_speed < maximal_speed_to_check_if_in_port or with type 'DEFAULT_AIS') - def get_port(x, session): - if x.type == 'DEFAULT_AIS' or x.average_speed < maximal_speed_to_check_if_in_port: - res = port_repository.get_closest_port_in_range(session, x.end_longitude, x.end_latitude, - threshold_distance_to_port) - if res: - (port_id, distance) = res - return port_id - else: - return None + def get_port(x, session): + if x.type == 'DEFAULT_AIS' or x.average_speed < maximal_speed_to_check_if_in_port: + res = port_repository.get_closest_port_in_range(session, x.end_longitude, x.end_latitude, + threshold_distance_to_port) + if res: + (port_id, distance) = res + return port_id else: return None - - df["port"] = df.apply(get_port, axis=1, args=(session,)) - - # get or create new excursion - # logic : - # if segment ends in a port while ongoing excursion is open, then we close the excursion - # else, if the ongoing excursion is open, then we use the ongoing excursion_id for the segment - # else, we create a new excursion whose id will become the ongoing excursion_id for this segment and the future ones - # additionnaly, when we create a new excursion, if the vessel is 'new' then we create an 'empty' excursion - # else, if the first segment of this new excursion is of type 'DEFAULT_AIS', we estimate the time of departure based - # on its ending position, distance traveled and a given average exit speed - df["excursion_id"] = np.NaN - for a in df.index: - if df["port"].iloc[a] is not None and df["port"].iloc[a] >= 0: - if (open_ongoing_excursion): - close_excursion(session, ongoing_excursion_id, int(df["port"].iloc[a]), - df["end_latitude"].iloc[a], - df["end_longitude"].iloc[a], - df["timestamp_end"].iloc[a]) # put the close excursion function here - df["excursion_id"].iloc[a] = ongoing_excursion_id - open_ongoing_excursion = False - nb_closed_excursion += 1 - elif open_ongoing_excursion: + else: + return None + df["port"] = df.apply(get_port, axis=1, args=(session,)) + + # get or create new excursion + # logic : + # if segment ends in a port while ongoing excursion is open, then we close the excursion + # else, if the ongoing excursion is open, then we use the ongoing excursion_id for the segment + # else, we create a new excursion whose id will become the ongoing excursion_id for this segment and the future ones + # additionnaly, when we create a new excursion, if the vessel is 'new' then we create an 'empty' excursion + # else, if the first segment of this new excursion is of type 'DEFAULT_AIS', we estimate the time of departure based + # on its ending position, distance traveled and a given average exit speed + df["excursion_id"] = np.NaN + for a in df.index: + if df["port"].iloc[a] is not None and df["port"].iloc[a] >= 0: + if (open_ongoing_excursion): + close_excursion(session, ongoing_excursion_id, int(df["port"].iloc[a]), + df["end_latitude"].iloc[a], + df["end_longitude"].iloc[a], + df["timestamp_end"].iloc[a]) # put the close excursion function here df["excursion_id"].iloc[a] = ongoing_excursion_id + open_ongoing_excursion = False + nb_closed_excursion += 1 + elif open_ongoing_excursion: + df["excursion_id"].iloc[a] = ongoing_excursion_id + else: + if is_new_vessel: + ongoing_excursion_id = add_excursion(session, int(vessel_id), + df["timestamp_end"].iloc[a], + Point(df["end_longitude"].iloc[a], + df["end_latitude"].iloc[ + a])) + is_new_vessel = False + nb_created_excursion += 1 else: - if is_new_vessel: - ongoing_excursion_id = add_excursion(session, int(vessel_id), - df["timestamp_end"].iloc[a], - Point(df["end_longitude"].iloc[a], - df["end_latitude"].iloc[ - a])) - is_new_vessel = False - nb_created_excursion += 1 - else: - def get_time_of_departure(): - if (df['type'].iloc[a] == 'DEFAULT_AIS'): - return df['timestamp_end'].iloc[a] - timedelta(0, 3600 * df['distance'].iloc[ - a] / average_exit_speed) - else: - return df["timestamp_start"].iloc[a] - - ongoing_excursion_id = add_excursion(session, int(vessel_id), - get_time_of_departure()) # put the create new excursion function here - nb_created_excursion += 1 - open_ongoing_excursion = True - df["excursion_id"].iloc[a] = ongoing_excursion_id + def get_time_of_departure(): + if (df['type'].iloc[a] == 'DEFAULT_AIS'): + return df['timestamp_end'].iloc[a] - timedelta(0, 3600 * df['distance'].iloc[ + a] / average_exit_speed) + else: + return df["timestamp_start"].iloc[a] + + ongoing_excursion_id = add_excursion(session, int(vessel_id), + get_time_of_departure()) # put the create new excursion function here + nb_created_excursion += 1 + open_ongoing_excursion = True + df["excursion_id"].iloc[a] = ongoing_excursion_id # concat the result for current vessel in the result dataframe if (df.shape[0] > 0): result = pd.concat([result, df[df["excursion_id"] >= 0]], axis=0)