Skip to content

Commit

Permalink
fix(html_crawl): Catch html_crawl errors
Browse files Browse the repository at this point in the history
  • Loading branch information
folland87 committed Feb 19, 2024
1 parent 0381cb7 commit 67ed884
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 206 deletions.
29 changes: 19 additions & 10 deletions app/tasks/html_crawl/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,23 @@ def get_html_crawl(self, crawl_id):

# We start the crawl in a separate process so each
# crawl creates its own Twisted reactor
process = Process(
target=start_crawl,
kwargs={"html_crawl": html_crawl, "url": crawl.url, "crawl_id": crawl.id})
process.start()
process.join(120) # Wait 120 seconds for the crawler to finish
if process.is_alive():
logger.error(
"Crawler timed out, the crawl may not contain enough pages")
process.terminate()
process.join()
try:
process = Process(
target=start_crawl,
kwargs={"html_crawl": html_crawl,
"url": crawl.url, "crawl_id": crawl.id}
)
process.start()
process.join(180) # Wait 120 seconds for the crawler to finish
if process.is_alive():
logger.error(
"Crawler timed out, the crawl may not contain enough pages")
process.terminate()
process.join()

except Exception as e:
logger.error(f"Error while crawling html files: {e}")
html_crawl.update(status=ProcessStatus.ERROR, task_id=self.request.id)
crawls.update_task(crawl_id=crawl.id,
task_name="html_crawl", task=html_crawl)
return
Empty file removed tests/tests_services/__init__.py
Empty file.
67 changes: 0 additions & 67 deletions tests/tests_services/test_carbon_calculator.py

This file was deleted.

33 changes: 0 additions & 33 deletions tests/tests_services/test_lighthouse_calculator.py

This file was deleted.

68 changes: 0 additions & 68 deletions tests/tests_services/test_technologies_calculator.py

This file was deleted.

28 changes: 0 additions & 28 deletions tests/tests_services/test_url_cleaner.py

This file was deleted.

0 comments on commit 67ed884

Please sign in to comment.