Skip to content

Commit

Permalink
Refactor code for consistency and remove unused imports
Browse files Browse the repository at this point in the history
Standardized string quotes from single to double, improved code formatting for better readability, and removed the unused `copyfile` and `rmtree` imports. Additionally, re-organized imports for better structure and consistency across the `create_blast_db.py` and `webhook_server.py` scripts.
  • Loading branch information
nuin committed Oct 1, 2024
1 parent 84929e0 commit 7ce1aae
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 49 deletions.
43 changes: 22 additions & 21 deletions src/create_blast_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import time
from datetime import datetime
from pathlib import Path
from shutil import copyfile, rmtree
from typing import Dict, List, Optional, Tuple

import click
Expand All @@ -31,8 +30,8 @@
from rich.table import Table

from utils import (check_md5sum, check_output, edit_fasta, get_ftp_file_size,
get_mod_from_json, needs_parse_id, run_command, s3_sync,
setup_logger, slack_message, get_https_file_size)
get_https_file_size, get_mod_from_json, needs_parse_id,
run_command, s3_sync, setup_logger, slack_message)

# Load environment variables
load_dotenv()
Expand Down Expand Up @@ -129,7 +128,7 @@ def get_files_ftp(fasta_uri: str, md5sum: str) -> bool:
return False

try:
if fasta_uri.startswith('https'):
if fasta_uri.startswith("https"):
file_size = get_https_file_size(fasta_uri)
else:
file_size = get_ftp_file_size(fasta_uri)
Expand Down Expand Up @@ -334,25 +333,28 @@ def derive_mod_from_input(input_file):
str: The MOD (Model Organism) extracted from the input file name. If the input file name does not have the expected format or the MOD cannot be extracted, returns 'Unknown'.
"""
file_name = Path(input_file).name
parts = file_name.split('.')
if len(parts) >= 3 and parts[0] == 'databases':
parts = file_name.split(".")
if len(parts) >= 3 and parts[0] == "databases":
return parts[1] # This should be the MOD
return 'Unknown'

return "Unknown"


@click.command()
@click.option("-g", "--config_yaml", help="YAML file with all MODs configuration")
@click.option("-j", "--input_json", help="JSON file input coordinates")
@click.option("-e", "--environment", help="Environment", default="dev")
@click.option("-m", "--mod", help="Model organism")
@click.option("-s", "--skip_efs_sync", help="Skip EFS sync", is_flag=True, default=False)
@click.option(
"-s", "--skip_efs_sync", help="Skip EFS sync", is_flag=True, default=False
)
@click.option("-u", "--update-slack", help="Update Slack", is_flag=True, default=False)
@click.option("-s3", "--sync-s3", help="Sync to S3", is_flag=True, default=False)
def create_dbs(config_yaml, input_json, environment, mod, skip_efs_sync, update_slack, sync_s3):
def create_dbs(
config_yaml, input_json, environment, mod, skip_efs_sync, update_slack, sync_s3
):
"""
A command line interface function that creates BLAST databases based on the provided configuration.
Parameters:
- config_yaml (str): YAML file with all MODs configuration.
- input_json (str): JSON file input coordinates.
Expand All @@ -361,7 +363,7 @@ def create_dbs(config_yaml, input_json, environment, mod, skip_efs_sync, update_
- skip_efs_sync (bool): Skip EFS sync. Default is False.
- update_slack (bool): Update Slack. Default is False.
- sync_s3 (bool): Sync to S3. Default is False.
Returns:
None
"""
Expand All @@ -373,16 +375,14 @@ def create_dbs(config_yaml, input_json, environment, mod, skip_efs_sync, update_
if mod is None:
mod = derive_mod_from_input(input_json or config_yaml)

db_info = {
"mod": mod,
"environment": environment,
"databases_created": []
}
db_info = {"mod": mod, "environment": environment, "databases_created": []}

if config_yaml:
success = process_yaml(Path(config_yaml), db_info)
elif input_json:
success = process_json(Path(input_json), environment, db_info['mod'], db_info)
success = process_json(
Path(input_json), environment, db_info["mod"], db_info
)
else:
LOGGER.error("Neither config_yaml nor input_json provided")
return
Expand All @@ -395,10 +395,12 @@ def create_dbs(config_yaml, input_json, environment, mod, skip_efs_sync, update_
message = f"*MOD:* {db_info['mod']}\n"
message += f"*Environment:* {db_info['environment']}\n"
message += f"*Databases created:*\n"
for db in db_info['databases_created']:
for db in db_info["databases_created"]:
message += f"• *{db['name']}* (Type: `{db['type']}`, Taxon ID: `{db['taxon_id']}`)\n"

slack_success = slack_message([{"text": message}], subject="BLAST Database Update")
slack_success = slack_message(
[{"text": message}], subject="BLAST Database Update"
)
LOGGER.info(f"Slack update {'successful' if slack_success else 'failed'}")

if sync_s3:
Expand All @@ -413,6 +415,5 @@ def create_dbs(config_yaml, input_json, environment, mod, skip_efs_sync, update_
LOGGER.info(f"create_dbs function completed in {duration:.2f} seconds")



if __name__ == "__main__":
create_dbs()
60 changes: 32 additions & 28 deletions src/webhook_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,33 @@
from pathlib import Path

import yaml
from flask import Flask, request, abort
from dotenv import load_dotenv, find_dotenv
from dotenv import find_dotenv, load_dotenv
from flask import Flask, abort, request

# Load environment variables from .env file in src directory
load_dotenv(find_dotenv(filename='src/.env'))
load_dotenv(find_dotenv(filename="src/.env"))

# Ensure logs directory exists
log_file = os.getenv('LOG_FILE', 'logs/webhook.log')
log_file = os.getenv("LOG_FILE", "logs/webhook.log")
log_dir = os.path.dirname(log_file)
Path(log_dir).mkdir(parents=True, exist_ok=True)

# Set up logging
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
filename=log_file)
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
filename=log_file,
)
logger = logging.getLogger(__name__)

app = Flask(__name__)


def verify_signature(payload_body, secret_token, signature):
"""Verify that the payload was sent from GitHub by validating SHA256."""
expected_signature = hmac.new(secret_token.encode(), payload_body, hashlib.sha256).hexdigest()
expected_signature = hmac.new(
secret_token.encode(), payload_body, hashlib.sha256
).hexdigest()
return hmac.compare_digest(f"sha256={expected_signature}", signature)


Expand All @@ -40,7 +44,7 @@ def run_script(script_path, config_path):
["python", script_path, "--input_config", config_path],
check=True,
capture_output=True,
text=True
text=True,
)
logger.info(f"Script output: {result.stdout}")
except subprocess.CalledProcessError as e:
Expand All @@ -51,46 +55,46 @@ def run_script(script_path, config_path):
def get_config_files(repo_path):
"""Get all JSON and YAML config files in the repository."""
config_files = []
for ext in ['.json', '.yaml', '.yml']:
config_files.extend(Path(repo_path).glob(f'**/*{ext}'))
for ext in [".json", ".yaml", ".yml"]:
config_files.extend(Path(repo_path).glob(f"**/*{ext}"))
return config_files


def process_config_file(config_file, script_path):
"""Process a single configuration file."""
file_ext = config_file.suffix.lower()

if file_ext == '.json':
with open(config_file, 'r') as f:
if file_ext == ".json":
with open(config_file, "r") as f:
config_data = json.load(f)
elif file_ext in ['.yaml', '.yml']:
with open(config_file, 'r') as f:
elif file_ext in [".yaml", ".yml"]:
with open(config_file, "r") as f:
config_data = yaml.safe_load(f)
else:
logger.warning(f"Unsupported file type: {file_ext}")
return

# Extract necessary information from config_data
# This part may need to be adjusted based on your specific JSON/YAML structure
input_json = config_data.get('input_json')
environment = config_data.get('environment', 'dev')
mod = config_data.get('mod')
input_json = config_data.get("input_json")
environment = config_data.get("environment", "dev")
mod = config_data.get("mod")

if input_json:
run_script(script_path, str(config_file))
else:
logger.warning(f"No input_json found in {config_file}")


@app.route('/webhook', methods=['POST'])
@app.route("/webhook", methods=["POST"])
def webhook():
# Verify GitHub webhook signature
signature = request.headers.get('X-Hub-Signature-256')
signature = request.headers.get("X-Hub-Signature-256")
if not signature:
logger.warning("No signature provided")
abort(400, "No signature provided")

github_secret = os.getenv('GITHUB_WEBHOOK_SECRET')
github_secret = os.getenv("GITHUB_WEBHOOK_SECRET")
if not github_secret:
logger.error("GitHub webhook secret not configured")
abort(500, "Server configuration error")
Expand All @@ -103,19 +107,19 @@ def webhook():
payload = request.json

# Check if it's a push event
if request.headers.get('X-GitHub-Event') != 'push':
if request.headers.get("X-GitHub-Event") != "push":
logger.info(f"Received non-push event: {request.headers.get('X-GitHub-Event')}")
return "OK", 200

# Get the repository details
repo_name = payload['repository']['full_name']
branch = payload['ref'].split('/')[-1]
repo_name = payload["repository"]["full_name"]
branch = payload["ref"].split("/")[-1]

logger.info(f"Received push event for {repo_name} on branch {branch}")

try:
# Update the local repository
repo_path = os.getenv('REPO_LOCAL_PATH')
repo_path = os.getenv("REPO_LOCAL_PATH")
if not repo_path:
logger.error("Repository local path not configured")
abort(500, "Server configuration error")
Expand All @@ -124,7 +128,7 @@ def webhook():
logger.info(f"Successfully pulled latest changes for {repo_name}")

# Get the script path
script_path = os.getenv('SCRIPT_PATH')
script_path = os.getenv("SCRIPT_PATH")
if not script_path:
logger.error("Script path not configured")
abort(500, "Server configuration error")
Expand All @@ -140,5 +144,5 @@ def webhook():
abort(500, "Error processing webhook")


if __name__ == '__main__':
app.run(host='0.0.0.0', port=int(os.getenv('PORT', 5000)))
if __name__ == "__main__":
app.run(host="0.0.0.0", port=int(os.getenv("PORT", 5000)))

0 comments on commit 7ce1aae

Please sign in to comment.