From 770072e721f28180b2f96861d255e2ea31bedf64 Mon Sep 17 00:00:00 2001
From: Yilei Pan <pandalei97@gmail.com>
Date: Mon, 11 Dec 2023 15:44:24 +0100
Subject: [PATCH 01/11] fix: correct filename

strip "/" to avoir having '/.html' as filename
---
 app/crawler/middlewares.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/crawler/middlewares.py b/app/crawler/middlewares.py
index 7241e9b..18b133d 100644
--- a/app/crawler/middlewares.py
+++ b/app/crawler/middlewares.py
@@ -48,7 +48,7 @@ def from_crawler(cls, crawler):
     def _format_file_path(self, response, spider) -> Path:
         domain = spider.allowed_domains[0]
         base_file_path = f"/{settings.LOCAL_FILES_PATH.strip('/')}/{spider.crawl_process.id}"
-        file_name = response.url.split(f"{domain}")[-1].lstrip('/')
+        file_name = response.url.split(f"{domain}")[-1].strip('/')
         if not file_name.endswith(".html"):
             file_name = f"{file_name}.html"
         if file_name == ".html":

From 2188a83e8abe4a306247d8c80fe29414b6c02aff Mon Sep 17 00:00:00 2001
From: Yilei Pan <pandalei97@gmail.com>
Date: Mon, 11 Dec 2023 16:02:50 +0100
Subject: [PATCH 02/11] fix: strop creating sub-process for html crawl

Avoid the error "daemonic processes are not allowed to have children" when we use prefork
---
 app/celery_broker/crawler_utils.py |  3 +--
 app/celery_broker/tasks.py         | 10 +---------
 docker-compose.yml                 |  2 ++
 3 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/app/celery_broker/crawler_utils.py b/app/celery_broker/crawler_utils.py
index 6b31e6d..9f3059f 100644
--- a/app/celery_broker/crawler_utils.py
+++ b/app/celery_broker/crawler_utils.py
@@ -33,11 +33,10 @@ def init_crawler_settings(crawl_process: CrawlProcess):
     return settings
 
 
-def start_crawler_process(crawl_process: CrawlProcess, results: dict):
+def start_crawler_process(crawl_process: CrawlProcess):
     process = CrawlerProcess(settings=init_crawler_settings(crawl_process))
     process.crawl(MenesrSpider, crawl_process=crawl_process)
     process.start()
-    results["metadata"] = dict(crawl_process.metadata.items())
 
 
 def set_html_crawl_status(crawl: CrawlModel, request_id: str, status: ProcessStatus):
diff --git a/app/celery_broker/tasks.py b/app/celery_broker/tasks.py
index ec55c71..a9cac78 100644
--- a/app/celery_broker/tasks.py
+++ b/app/celery_broker/tasks.py
@@ -42,15 +42,7 @@ def start_crawl_process(self, crawl: CrawlModel) -> CrawlProcess:
     crawl_process = CrawlProcess.from_model(crawl)
 
     try:
-        with Manager() as manager:
-            shared_dict = manager.dict()
-            p = Process(
-                target=start_crawler_process,
-                kwargs={"crawl_process": crawl_process, "results": shared_dict},
-            )
-            p.start()
-            p.join()  # TODO define and add a timeout
-            crawl_process.metadata.update(shared_dict["metadata"])
+        start_crawler_process(crawl_process=crawl_process)
     except Exception as e:
         logger.error(f"Error while crawling html files: {e}")
         set_html_crawl_status(crawl, self.request.id, ProcessStatus.ERROR)
diff --git a/docker-compose.yml b/docker-compose.yml
index 1d5cbc5..88aa510 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -32,6 +32,8 @@ services:
     build: .
     env_file: .env
     command: watchfiles --filter python 'celery -A celery_broker.main.celery_app worker -l info -P solo -n crawl_worker -Q crawl_queue,finalize_crawl_queue'
+    # An example with prefork.
+    #command: watchfiles --filter python 'celery -A celery_broker.main.celery_app worker -l info -P prefork -c 4 -n crawl_worker -Q crawl_queue,finalize_crawl_queue'
     volumes:
       - ./app:/open-crawler/app
       - local_files:${LOCAL_FILES_PATH}

From e26895c6fc23f401e77ea0af0ede581e3d775637 Mon Sep 17 00:00:00 2001
From: Yilei Pan <pandalei97@gmail.com>
Date: Tue, 12 Dec 2023 15:15:03 +0100
Subject: [PATCH 03/11] Fix: MSPDT76 - update finished_at

---
 app/celery_broker/tasks.py | 9 +++++----
 app/repositories/crawls.py | 7 +++++--
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/app/celery_broker/tasks.py b/app/celery_broker/tasks.py
index a9cac78..c05ea53 100644
--- a/app/celery_broker/tasks.py
+++ b/app/celery_broker/tasks.py
@@ -2,7 +2,6 @@
 import os
 import pathlib
 import shutil
-from multiprocessing import Process, Manager
 from typing import Optional
 
 # Local imports
@@ -124,9 +123,11 @@ def finalize_crawl_process(self, crawl_process: Optional[CrawlProcess], crawl: C
     current_crawl = crawls.get(crawl_id=crawl.id)
 
     if current_crawl.status == ProcessStatus.STARTED:
-        crawls.update_status(
-            crawl_id=crawl.id, status=ProcessStatus.SUCCESS
-        )
+        current_crawl.status = ProcessStatus.SUCCESS
+
+    crawls.update_status(
+        crawl_id=crawl.id, status=current_crawl.status, final_status=True
+    )
 
     websites.store_last_crawl(
         website_id=crawl.website_id,
diff --git a/app/repositories/crawls.py b/app/repositories/crawls.py
index b16d7dd..a9cf48d 100644
--- a/app/repositories/crawls.py
+++ b/app/repositories/crawls.py
@@ -63,11 +63,14 @@ def update(self, data: CrawlModel):
             },
         )
 
-    def update_status(self, crawl_id: str, status: ProcessStatus):
+    def update_status(self, crawl_id: str, status: ProcessStatus, final_status: bool = False):
         update_dict = {"status": status}
         if status == ProcessStatus.STARTED:
             update_dict["started_at"] = french_datetime()
-        if status == ProcessStatus.SUCCESS:
+        elif status == ProcessStatus.SUCCESS:
+            update_dict["finished_at"] = french_datetime()
+        # In finalize task, we should update the finished_at field regardless of the status
+        if final_status:
             update_dict["finished_at"] = french_datetime()
         self.collection.update_one(
             filter={"id": crawl_id},

From 9ed4077d79fd718eac5796c8354209f723da05f1 Mon Sep 17 00:00:00 2001
From: Yilei Pan <pandalei97@gmail.com>
Date: Wed, 13 Dec 2023 17:27:43 +0100
Subject: [PATCH 04/11] MSPDT-80: update crawl process status if a metadata
 fails

---
 app/celery_broker/metadata_utils.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/app/celery_broker/metadata_utils.py b/app/celery_broker/metadata_utils.py
index 477873c..ef54133 100644
--- a/app/celery_broker/metadata_utils.py
+++ b/app/celery_broker/metadata_utils.py
@@ -25,6 +25,11 @@ def handle_metadata_result(
             task_name=metadata_type,
             task=task,
         )
+        # when the metadata task fails, the crawl process is considered partially failed.
+        # Because metadata tasks are exécuted only if the html_crawl is successful,
+        crawls.update_status(
+            crawl_id=crawl_process.id, status=ProcessStatus.PARTIAL_ERROR
+        )
         logger.error(f"{metadata_type} failed.")
         return crawl_process
     store_metadata_result(crawl_process, result, metadata_type)

From 6d0fd2003324c757619b6b9c676ce109a71cb8fa Mon Sep 17 00:00:00 2001
From: Yilei Pan <pandalei97@gmail.com>
Date: Thu, 14 Dec 2023 10:22:19 +0100
Subject: [PATCH 05/11] Revert "MSPDT-80: update crawl process status if a
 metadata fails"

This reverts commit 9ed4077d79fd718eac5796c8354209f723da05f1.
---
 app/celery_broker/metadata_utils.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/app/celery_broker/metadata_utils.py b/app/celery_broker/metadata_utils.py
index ef54133..477873c 100644
--- a/app/celery_broker/metadata_utils.py
+++ b/app/celery_broker/metadata_utils.py
@@ -25,11 +25,6 @@ def handle_metadata_result(
             task_name=metadata_type,
             task=task,
         )
-        # when the metadata task fails, the crawl process is considered partially failed.
-        # Because metadata tasks are exécuted only if the html_crawl is successful,
-        crawls.update_status(
-            crawl_id=crawl_process.id, status=ProcessStatus.PARTIAL_ERROR
-        )
         logger.error(f"{metadata_type} failed.")
         return crawl_process
     store_metadata_result(crawl_process, result, metadata_type)

From f448e867c1a9e9d42b28b48912c37f1a66eacf12 Mon Sep 17 00:00:00 2001
From: Yilei Pan <pandalei97@gmail.com>
Date: Thu, 14 Dec 2023 14:12:45 +0100
Subject: [PATCH 06/11] Allow the first url of the response.

---
 app/crawler/middlewares.py | 16 ++++++++++++++++
 app/crawler/spider.py      | 20 +++++++-------------
 2 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/app/crawler/middlewares.py b/app/crawler/middlewares.py
index 18b133d..1ed6ef7 100644
--- a/app/crawler/middlewares.py
+++ b/app/crawler/middlewares.py
@@ -3,6 +3,7 @@
 # See documentation in:
 # https://docs.scrapy.org/en/latest/topics/spider-middleware.html
 from pathlib import Path
+from urllib.parse import urlparse
 
 from app.config import settings
 
@@ -63,12 +64,27 @@ def _save_html_locally(self, response, spider):
         file_path.write_text(response.text)
 
     def process_response(self, request, response, spider):
+
         if self.page_limit != 0 and self.current_page_count >= self.page_limit:
             raise IgnoreRequest(
                 f"Page limit reached. Ignoring request {request}"
             )
+
         if request.url.endswith("robots.txt"):
             return response
+
+        if spider.first_real_url is None:
+            # We should set the allowed_url and allowed_domains only once for the first request.
+            # It is useful when we have a permanent redirection in the first request.
+            spider.first_real_url = response.url
+            parsed_url = urlparse(spider.first_real_url)
+            if parsed_url.path:
+                spider.allowed_url = parsed_url.path
+            else:
+                spider.allowed_url = parsed_url.netloc
+            spider.allowed_domains = [parsed_url.netloc]
+
+
         if response.status == 200:
             self.current_page_count += 1
             self._save_html_locally(response, spider)
diff --git a/app/crawler/spider.py b/app/crawler/spider.py
index e56ee28..e56559e 100644
--- a/app/crawler/spider.py
+++ b/app/crawler/spider.py
@@ -1,5 +1,3 @@
-from urllib.parse import urlparse
-
 from scrapy import Request
 from scrapy.linkextractors import LinkExtractor
 from scrapy.spiders import CrawlSpider, Rule
@@ -11,18 +9,15 @@ class MenesrSpider(CrawlSpider):
     rules = (Rule(),)
     use_playwright = False
     allowed_url = None
+    first_real_url = None
     page_count = 0
     page_limit = 0
     depth_limit = 0
 
     def __init__(self, crawl_process: CrawlProcess, *a, **kw):
-        parsed_url = urlparse(crawl_process.config.url)
         self.use_playwright = crawl_process.config.parameters.use_playwright
-        if parsed_url.path:
-            self.allowed_url = parsed_url.path
         self.page_limit = crawl_process.config.parameters.limit
         self.depth_limit = crawl_process.config.parameters.depth
-        self.allowed_domains = [parsed_url.netloc]
         self.start_urls = [crawl_process.config.url]
         self.crawl_process = crawl_process
         super().__init__(*a, **kw)
@@ -30,18 +25,17 @@ def __init__(self, crawl_process: CrawlProcess, *a, **kw):
     
     def start_requests(self):
         for url in self.start_urls:
+            meta = {
+                "depth": 0,  # Set the initial depth to 0
+            }
             if self.use_playwright:
-                yield Request(url, self.parse, meta={
-                    "depth": 0, # Set the initial depth to 0
+                meta.update({
                     "playwright": True,
                     "playwright_page_methods": [
                         ("evaluate", 'window.scrollTo(0, document.body.scrollHeight)')
-                    ]
-                })
-            else:
-                yield Request(url, self.parse, meta={
-                    "depth": 0, # Set the initial depth to 0
+                    ],
                 })
+            yield Request(url, self.parse, meta=meta)
 
     def parse(self, response, **kwargs):
         # Crawl the links in the response page and continue to crawl the next page

From 669b990518a8fd7ab320b07693161e3a654dfbfd Mon Sep 17 00:00:00 2001
From: Yilei Pan <pandalei97@gmail.com>
Date: Thu, 14 Dec 2023 14:17:54 +0100
Subject: [PATCH 07/11] fix: correct tests (alllowed domains are not set at the
 initialisation anymore)

---
 tests/tests_crawler/test_menesr.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/tests_crawler/test_menesr.py b/tests/tests_crawler/test_menesr.py
index b301d6c..5243f6e 100644
--- a/tests/tests_crawler/test_menesr.py
+++ b/tests/tests_crawler/test_menesr.py
@@ -20,7 +20,6 @@ def test_init_without_path(self):
         self.mock_crawl_process.config.url = "http://example.com/"
         spider = MenesrSpider(self.mock_crawl_process)
 
-        self.assertEqual(spider.allowed_domains, ["example.com"])
         self.assertEqual(spider.start_urls, ["http://example.com/"])
         self.assertTrue(isinstance(spider.rules, tuple))
 
@@ -28,8 +27,6 @@ def test_init_with_path(self):
         spider = MenesrSpider(self.mock_crawl_process)
 
         # Checking initialized values
-        parsed_url = urlparse(self.mock_crawl_process.config.url)
-        self.assertEqual(spider.allowed_domains, [parsed_url.netloc])
         self.assertEqual(
             spider.start_urls, [self.mock_crawl_process.config.url]
         )

From b58e64b0bd0cf3435c4f0979de6beb7e69c745c9 Mon Sep 17 00:00:00 2001
From: Yilei Pan <pandalei97@gmail.com>
Date: Thu, 14 Dec 2023 15:00:57 +0100
Subject: [PATCH 08/11] MSPDT-80: Check all the task status in finalize_crawl
 task

---
 app/celery_broker/tasks.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/app/celery_broker/tasks.py b/app/celery_broker/tasks.py
index c05ea53..06451fe 100644
--- a/app/celery_broker/tasks.py
+++ b/app/celery_broker/tasks.py
@@ -122,8 +122,23 @@ def finalize_crawl_process(self, crawl_process: Optional[CrawlProcess], crawl: C
     # Retrieve the current status of the crawl
     current_crawl = crawls.get(crawl_id=crawl.id)
 
+    have_success = False
+
+    # Retrieve the status of all the sub tasks (html_crawl, lighthouse, technologies, responsiveness, carbon_footprint)
+    # If one of the sub tasks failed, we consider the crawl as partially failed
+    all_tasks = [current_crawl.html_crawl, current_crawl.lighthouse, current_crawl.technologies_and_trackers, current_crawl.responsiveness, current_crawl.carbon_footprint]
+    for task in all_tasks:
+        if task is None:
+            continue
+        if task.status != ProcessStatus.SUCCESS:
+            current_crawl.status = ProcessStatus.PARTIAL_ERROR
+        else:
+            have_success = True
+
     if current_crawl.status == ProcessStatus.STARTED:
         current_crawl.status = ProcessStatus.SUCCESS
+    elif not have_success:
+        current_crawl.status = ProcessStatus.ERROR
 
     crawls.update_status(
         crawl_id=crawl.id, status=current_crawl.status, final_status=True

From 055fff6462040d1abe8f9a021709e1d1487c2160 Mon Sep 17 00:00:00 2001
From: Yilei Pan <pandalei97@gmail.com>
Date: Thu, 14 Dec 2023 15:10:06 +0100
Subject: [PATCH 09/11] fix: only calculate crawl statut at the beginning and
 the end of the crawl

---
 app/celery_broker/metadata_utils.py | 6 ------
 app/celery_broker/tasks.py          | 8 +-------
 app/repositories/crawls.py          | 7 ++-----
 3 files changed, 3 insertions(+), 18 deletions(-)

diff --git a/app/celery_broker/metadata_utils.py b/app/celery_broker/metadata_utils.py
index 477873c..44a31a1 100644
--- a/app/celery_broker/metadata_utils.py
+++ b/app/celery_broker/metadata_utils.py
@@ -87,9 +87,6 @@ def metadata_task(
                         task_name=metadata_type,
                         task=task,
                     )
-                    crawls.update_status(
-                        crawl_id=crawl_process.id, status=ProcessStatus.PARTIAL_ERROR
-                    )
                 continue
             except Exception as e:
                 logger.error(
@@ -102,8 +99,5 @@ def metadata_task(
                         task_name=metadata_type,
                         task=task,
                     )
-                    crawls.update_status(
-                        crawl_id=crawl_process.id, status=ProcessStatus.PARTIAL_ERROR
-                    )
                 continue
     return handle_metadata_result(task, crawl_process, result, metadata_type)
diff --git a/app/celery_broker/tasks.py b/app/celery_broker/tasks.py
index 06451fe..3f3dbb0 100644
--- a/app/celery_broker/tasks.py
+++ b/app/celery_broker/tasks.py
@@ -45,9 +45,6 @@ def start_crawl_process(self, crawl: CrawlModel) -> CrawlProcess:
     except Exception as e:
         logger.error(f"Error while crawling html files: {e}")
         set_html_crawl_status(crawl, self.request.id, ProcessStatus.ERROR)
-        crawls.update_status(
-            crawl_id=crawl.id, status=ProcessStatus.ERROR
-        )
         self.update_state(state='FAILURE')
         return crawl_process
     try:
@@ -57,9 +54,6 @@ def start_crawl_process(self, crawl: CrawlModel) -> CrawlProcess:
         logger.error(f"Error while uploading html files: {e}")
         # Html crawl will be considered failed if we can't upload the html files
         set_html_crawl_status(crawl, self.request.id, ProcessStatus.ERROR)
-        crawls.update_status(
-            crawl_id=crawl.id, status=ProcessStatus.ERROR
-        )
         self.update_state(state='FAILURE')
         return crawl_process
 
@@ -141,7 +135,7 @@ def finalize_crawl_process(self, crawl_process: Optional[CrawlProcess], crawl: C
         current_crawl.status = ProcessStatus.ERROR
 
     crawls.update_status(
-        crawl_id=crawl.id, status=current_crawl.status, final_status=True
+        crawl_id=crawl.id, status=current_crawl.status
     )
 
     websites.store_last_crawl(
diff --git a/app/repositories/crawls.py b/app/repositories/crawls.py
index a9cf48d..5f795e1 100644
--- a/app/repositories/crawls.py
+++ b/app/repositories/crawls.py
@@ -63,14 +63,11 @@ def update(self, data: CrawlModel):
             },
         )
 
-    def update_status(self, crawl_id: str, status: ProcessStatus, final_status: bool = False):
+    def update_status(self, crawl_id: str, status: ProcessStatus):
         update_dict = {"status": status}
         if status == ProcessStatus.STARTED:
             update_dict["started_at"] = french_datetime()
-        elif status == ProcessStatus.SUCCESS:
-            update_dict["finished_at"] = french_datetime()
-        # In finalize task, we should update the finished_at field regardless of the status
-        if final_status:
+        else:
             update_dict["finished_at"] = french_datetime()
         self.collection.update_one(
             filter={"id": crawl_id},

From 56a59f766d8ef1d879c5df8721e430971f975bd3 Mon Sep 17 00:00:00 2001
From: Yilei Pan <pandalei97@gmail.com>
Date: Thu, 14 Dec 2023 17:10:02 +0100
Subject: [PATCH 10/11] Revert "fix: strop creating sub-process for html crawl"

This reverts commit 2188a83e8abe4a306247d8c80fe29414b6c02aff.
---
 app/celery_broker/crawler_utils.py |  3 ++-
 app/celery_broker/tasks.py         | 10 +++++++++-
 docker-compose.yml                 |  2 --
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/app/celery_broker/crawler_utils.py b/app/celery_broker/crawler_utils.py
index 9f3059f..6b31e6d 100644
--- a/app/celery_broker/crawler_utils.py
+++ b/app/celery_broker/crawler_utils.py
@@ -33,10 +33,11 @@ def init_crawler_settings(crawl_process: CrawlProcess):
     return settings
 
 
-def start_crawler_process(crawl_process: CrawlProcess):
+def start_crawler_process(crawl_process: CrawlProcess, results: dict):
     process = CrawlerProcess(settings=init_crawler_settings(crawl_process))
     process.crawl(MenesrSpider, crawl_process=crawl_process)
     process.start()
+    results["metadata"] = dict(crawl_process.metadata.items())
 
 
 def set_html_crawl_status(crawl: CrawlModel, request_id: str, status: ProcessStatus):
diff --git a/app/celery_broker/tasks.py b/app/celery_broker/tasks.py
index 3f3dbb0..498d91c 100644
--- a/app/celery_broker/tasks.py
+++ b/app/celery_broker/tasks.py
@@ -41,7 +41,15 @@ def start_crawl_process(self, crawl: CrawlModel) -> CrawlProcess:
     crawl_process = CrawlProcess.from_model(crawl)
 
     try:
-        start_crawler_process(crawl_process=crawl_process)
+        with Manager() as manager:
+            shared_dict = manager.dict()
+            p = Process(
+                target=start_crawler_process,
+                kwargs={"crawl_process": crawl_process, "results": shared_dict},
+            )
+            p.start()
+            p.join()  # TODO define and add a timeout
+            crawl_process.metadata.update(shared_dict["metadata"])
     except Exception as e:
         logger.error(f"Error while crawling html files: {e}")
         set_html_crawl_status(crawl, self.request.id, ProcessStatus.ERROR)
diff --git a/docker-compose.yml b/docker-compose.yml
index 88aa510..1d5cbc5 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -32,8 +32,6 @@ services:
     build: .
     env_file: .env
     command: watchfiles --filter python 'celery -A celery_broker.main.celery_app worker -l info -P solo -n crawl_worker -Q crawl_queue,finalize_crawl_queue'
-    # An example with prefork.
-    #command: watchfiles --filter python 'celery -A celery_broker.main.celery_app worker -l info -P prefork -c 4 -n crawl_worker -Q crawl_queue,finalize_crawl_queue'
     volumes:
       - ./app:/open-crawler/app
       - local_files:${LOCAL_FILES_PATH}

From bc6ed5e2ffb26722000cdd1df52ed9cb825cddff Mon Sep 17 00:00:00 2001
From: Yilei Pan <pandalei97@gmail.com>
Date: Thu, 14 Dec 2023 17:32:50 +0100
Subject: [PATCH 11/11] MSPDT-75: Define timeout for html_crawl process

---
 app/celery_broker/tasks.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/app/celery_broker/tasks.py b/app/celery_broker/tasks.py
index 498d91c..d40b034 100644
--- a/app/celery_broker/tasks.py
+++ b/app/celery_broker/tasks.py
@@ -1,7 +1,9 @@
 # Standard library imports
+import asyncio
 import os
 import pathlib
 import shutil
+from multiprocessing import Manager, Process
 from typing import Optional
 
 # Local imports
@@ -48,8 +50,14 @@ def start_crawl_process(self, crawl: CrawlModel) -> CrawlProcess:
                 kwargs={"crawl_process": crawl_process, "results": shared_dict},
             )
             p.start()
-            p.join()  # TODO define and add a timeout
+            p.join(120)  # Wait 120 seconds for the crawler to finish
+            if p.is_alive():
+                logger.error("Crawler timed out, the crawl may not contain enough pages")
+                p.terminate()
+                p.join()
+
             crawl_process.metadata.update(shared_dict["metadata"])
+
     except Exception as e:
         logger.error(f"Error while crawling html files: {e}")
         set_html_crawl_status(crawl, self.request.id, ProcessStatus.ERROR)