From d5555d23cbcf6874818c9a1c4305278698915606 Mon Sep 17 00:00:00 2001 From: Yilei Pan Date: Thu, 16 Nov 2023 14:12:15 +0100 Subject: [PATCH 1/2] Remove unused import & Init database when we need it --- app/api/factory.py | 2 -- app/celery_broker/factory.py | 2 -- app/celery_broker/metadata_utils.py | 2 -- app/crawler/middlewares.py | 1 - app/models/website.py | 1 - app/mongo.py | 23 +++++++++++-------- app/repositories/crawls.py | 4 ++-- app/repositories/websites.py | 4 ++-- app/s3.py | 1 - app/services/responsiveness_calculator.py | 1 - .../test_responsiveness_calculator.py | 1 - 11 files changed, 17 insertions(+), 25 deletions(-) diff --git a/app/api/factory.py b/app/api/factory.py index 79e8d80..03ab650 100644 --- a/app/api/factory.py +++ b/app/api/factory.py @@ -1,5 +1,3 @@ -import os - from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware diff --git a/app/celery_broker/factory.py b/app/celery_broker/factory.py index df2ccbd..77fe520 100644 --- a/app/celery_broker/factory.py +++ b/app/celery_broker/factory.py @@ -1,5 +1,3 @@ -import os - from celery import Celery from app.config import settings diff --git a/app/celery_broker/metadata_utils.py b/app/celery_broker/metadata_utils.py index 57d8b8b..89224b3 100644 --- a/app/celery_broker/metadata_utils.py +++ b/app/celery_broker/metadata_utils.py @@ -1,6 +1,4 @@ import json -import os -import pathlib import app.repositories as repositories from app.models.enums import MetadataType, ProcessStatus diff --git a/app/crawler/middlewares.py b/app/crawler/middlewares.py index 8f1020c..2df014b 100644 --- a/app/crawler/middlewares.py +++ b/app/crawler/middlewares.py @@ -2,7 +2,6 @@ # # See documentation in: # https://docs.scrapy.org/en/latest/topics/spider-middleware.html -import os from pathlib import Path from app.config import settings diff --git a/app/models/website.py b/app/models/website.py index c024257..2675f1d 100644 --- a/app/models/website.py +++ b/app/models/website.py @@ -1,4 +1,3 @@ -import os from datetime import datetime, timedelta from typing import Optional, Any diff --git a/app/mongo.py b/app/mongo.py index 1a77f8f..9c9bc2a 100644 --- a/app/mongo.py +++ b/app/mongo.py @@ -1,16 +1,19 @@ -import os - from app.config import settings from pymongo import MongoClient client = MongoClient(host=settings.MONGO_URI) -db = client[settings.MONGO_DBNAME] +db = None + -db[settings.MONGO_WEBSITES_COLLECTION].create_index( - [("id", 1)], unique=True -) -db[settings.MONGO_WEBSITES_COLLECTION].create_index( - [("url", 1)], unique=True -) -db[settings.MONGO_CRAWLS_COLLECTION].create_index([("id", 1)], unique=True) +def init_database(): + global db + if db is None: + db = client[settings.MONGO_DBNAME] + db[settings.MONGO_WEBSITES_COLLECTION].create_index( + [("id", 1)], unique=True + ) + db[settings.MONGO_WEBSITES_COLLECTION].create_index( + [("url", 1)], unique=True + ) + db[settings.MONGO_CRAWLS_COLLECTION].create_index([("id", 1)], unique=True) diff --git a/app/repositories/crawls.py b/app/repositories/crawls.py index 905bb35..671e11e 100644 --- a/app/repositories/crawls.py +++ b/app/repositories/crawls.py @@ -1,4 +1,3 @@ -import os from pymongo.results import InsertOneResult from app.celery_broker.utils import french_datetime @@ -6,13 +5,14 @@ from app.models.crawl import CrawlModel, ListCrawlResponse from app.models.enums import ProcessStatus from app.models.metadata import MetadataTask -from app.mongo import db +from app.mongo import db, init_database class CrawlsRepository: """Operations for crawls collection""" def __init__(self): + init_database() self.collection = db[settings.MONGO_CRAWLS_COLLECTION] def create(self, data: CrawlModel) -> str: diff --git a/app/repositories/websites.py b/app/repositories/websites.py index 59f91d3..64d1e23 100644 --- a/app/repositories/websites.py +++ b/app/repositories/websites.py @@ -1,4 +1,3 @@ -import os from typing import Any from pymongo.results import InsertOneResult, UpdateResult @@ -8,13 +7,14 @@ from app.models.enums import ProcessStatus from app.models.request import UpdateWebsiteRequest from app.models.website import WebsiteModel, ListWebsiteResponse -from app.mongo import db +from app.mongo import db, init_database class WebsitesRepository: """Operations for websites collection""" def __init__(self): + init_database() self.collection = db[settings.MONGO_WEBSITES_COLLECTION] def list( diff --git a/app/s3.py b/app/s3.py index e16d51d..6de044c 100644 --- a/app/s3.py +++ b/app/s3.py @@ -1,4 +1,3 @@ -import os from functools import wraps from minio import Minio diff --git a/app/services/responsiveness_calculator.py b/app/services/responsiveness_calculator.py index 4fcb744..3e72e7a 100644 --- a/app/services/responsiveness_calculator.py +++ b/app/services/responsiveness_calculator.py @@ -1,4 +1,3 @@ -import os from typing import Any import requests diff --git a/tests/tests_services/test_responsiveness_calculator.py b/tests/tests_services/test_responsiveness_calculator.py index 24e75ce..fe20272 100644 --- a/tests/tests_services/test_responsiveness_calculator.py +++ b/tests/tests_services/test_responsiveness_calculator.py @@ -1,4 +1,3 @@ -import os import unittest from unittest.mock import patch, Mock From 55e81525808e5ebda9daab3bbf1a570c6ed9d64d Mon Sep 17 00:00:00 2001 From: Yilei Pan Date: Thu, 16 Nov 2023 14:52:04 +0100 Subject: [PATCH 2/2] clean: remove unnecessary code in __init__.py --- app/repositories/__init__.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/app/repositories/__init__.py b/app/repositories/__init__.py index 5e0f6b1..e69de29 100644 --- a/app/repositories/__init__.py +++ b/app/repositories/__init__.py @@ -1,5 +0,0 @@ -from .crawls import crawls -from .websites import websites -from .files import files - -__all__ = ["crawls", "websites", "files"]