diff --git a/docs/configuration/index.rst b/docs/configuration/index.rst index 3c505f4c..838bb398 100644 --- a/docs/configuration/index.rst +++ b/docs/configuration/index.rst @@ -47,6 +47,8 @@ Defines parameters for the API: specified in seconds. Defaults to 300. - commit_confirmed_wait: Time to wait between comitting configuration and checking that the device is still reachable, specified in seconds. Defaults to 1. +- napalm_timeout: Timeout for NAPALM operations, specified in seconds. Defaults to 60. + Increase if you get errors like "jnpr.junos.exception.RpcTimeoutError: RpcTimeoutError" on jobs. /etc/cnaas-nms/auth_config.yml ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/src/cnaas_nms/app_settings.py b/src/cnaas_nms/app_settings.py index f1242161..5ad993c7 100644 --- a/src/cnaas_nms/app_settings.py +++ b/src/cnaas_nms/app_settings.py @@ -57,6 +57,7 @@ class ApiSettings(BaseSettings): COMMIT_CONFIRMED_TIMEOUT: int = 300 COMMIT_CONFIRMED_WAIT: int = 1 SETTINGS_OVERRIDE: Optional[dict] = None + NAPALM_TIMEOUT: int = 60 @field_validator("MGMTDOMAIN_PRIMARY_IP_VERSION") @classmethod @@ -118,6 +119,7 @@ def construct_api_settings() -> ApiSettings: COMMIT_CONFIRMED_TIMEOUT=config.get("commit_confirmed_timeout", 300), COMMIT_CONFIRMED_WAIT=config.get("commit_confirmed_wait", 1), SETTINGS_OVERRIDE=config.get("settings_override", None), + NAPALM_TIMEOUT=config.get("napalm_timeout", 60), ) else: return ApiSettings() diff --git a/src/cnaas_nms/db/device.py b/src/cnaas_nms/db/device.py index 17310d63..a8f4d523 100644 --- a/src/cnaas_nms/db/device.py +++ b/src/cnaas_nms/db/device.py @@ -7,6 +7,7 @@ import re from typing import List, Optional, Set, Tuple +from nornir.core.inventory import Group as NornirGroup from sqlalchemy import Boolean, DateTime, Enum, ForeignKey, Integer, String, Unicode, UniqueConstraint, event from sqlalchemy.orm import Mapped, mapped_column, relationship from sqlalchemy.sql import func @@ -503,6 +504,16 @@ def validate(cls, new_entry=True, **kwargs): return data, errors + @classmethod + def nornir_groups_to_devicetype(cls, groups: List[NornirGroup]) -> DeviceType: + """Parse list of groups from nornir (task.host.groups) and return DeviceType""" + devtype: DeviceType = DeviceType.UNKNOWN + # Get the first group that starts with T_ and use that name to determine DeviceType + # Eg group name T_DIST -> DeviceType.DIST + devtype_name = next(filter(lambda x: x.name.startswith("T_"), groups)).name[2:] + devtype = DeviceType[devtype_name] + return devtype + @event.listens_for(Device, "after_update") def after_update_device(mapper, connection, target: Device): diff --git a/src/cnaas_nms/db/git.py b/src/cnaas_nms/db/git.py index e77858c9..56812ba5 100644 --- a/src/cnaas_nms/db/git.py +++ b/src/cnaas_nms/db/git.py @@ -3,16 +3,17 @@ import json import os import shutil -from typing import Dict, Optional, Set, Tuple +from typing import Dict, List, Optional, Set, Tuple from urllib.parse import urldefrag +import git.remote import yaml from git.exc import GitCommandError, NoSuchPathError from cnaas_nms.app_settings import app_settings from cnaas_nms.db.device import Device, DeviceType from cnaas_nms.db.exceptions import ConfigException, RepoStructureException -from cnaas_nms.db.git_worktrees import WorktreeError, clean_templates_worktree +from cnaas_nms.db.git_worktrees import WorktreeError, find_templates_worktree_path, refresh_existing_templates_worktrees from cnaas_nms.db.job import Job, JobStatus from cnaas_nms.db.joblock import Joblock, JoblockError from cnaas_nms.db.session import redis_session, sqla_session @@ -21,12 +22,15 @@ SettingsSyntaxError, VlanConflictError, get_device_primary_groups, + get_group_settings_asdict, + get_group_templates_branch, get_groups, rebuild_settings_cache, ) from cnaas_nms.devicehandler.sync_history import add_sync_event from cnaas_nms.scheduler.thread_data import set_thread_data from cnaas_nms.tools.event import add_event +from cnaas_nms.tools.githelpers import parse_git_changed_files from cnaas_nms.tools.log import get_logger from git import InvalidGitRepositoryError, Repo @@ -251,7 +255,7 @@ def _refresh_repo_task_templates(job_id: Optional[int] = None) -> str: devtype: DeviceType for devtype, platform in updated_devtypes: Device.set_devtype_syncstatus(session, devtype, ret, "templates", platform, job_id) - clean_templates_worktree() + refresh_existing_templates_worktrees(job_id, get_group_settings_asdict(), get_device_primary_groups()) return ret @@ -293,17 +297,9 @@ def _refresh_repo_task(local_repo_path, remote_repo_path) -> Tuple[str, Set[str] prev_commit = local_repo.commit().hexsha logger.debug("git pull from {}".format(remote_repo_path)) - diff = local_repo.remotes.origin.pull() - for item in diff: - if item.ref.remote_head != local_repo.head.ref.name: # type: ignore - continue + diff: List[git.remote.FetchInfo] = local_repo.remotes.origin.pull() + ret, changed_files = parse_git_changed_files(diff, prev_commit, local_repo) - ret += "Commit {} by {} at {}\n".format( - item.commit.name_rev, item.commit.committer, item.commit.committed_datetime - ) - diff_files = local_repo.git.diff("{}..{}".format(prev_commit, item.commit.hexsha), name_only=True).split() - changed_files.update(diff_files) - prev_commit = item.commit.hexsha except (InvalidGitRepositoryError, NoSuchPathError): # noqa: S110 logger.info("Local repository {} not found, cloning from remote".format(local_repo_path)) try: @@ -427,3 +423,17 @@ def parse_repo_url(url: str) -> Tuple[str, Optional[str]]: """Parses a URL to a repository, returning the path and branch refspec separately""" path, branch = urldefrag(url) return path, branch if branch else None + + +def get_template_repo_path(hostname: str): + local_repo_path = app_settings.TEMPLATES_LOCAL + + # override template path if primary group template path is set + primary_group = get_device_primary_groups().get(hostname) + if primary_group: + templates_branch = get_group_templates_branch(primary_group) + if templates_branch: + primary_group_template_path = find_templates_worktree_path(templates_branch) + if primary_group_template_path: + local_repo_path = primary_group_template_path + return local_repo_path diff --git a/src/cnaas_nms/db/git_worktrees.py b/src/cnaas_nms/db/git_worktrees.py index 46fc8620..983f1435 100644 --- a/src/cnaas_nms/db/git_worktrees.py +++ b/src/cnaas_nms/db/git_worktrees.py @@ -1,10 +1,15 @@ import os import shutil -from typing import Optional +from typing import Optional, Set import git.exc from cnaas_nms.app_settings import app_settings +from cnaas_nms.db.device import Device +from cnaas_nms.db.groups import get_groups_using_branch +from cnaas_nms.db.session import sqla_session +from cnaas_nms.devicehandler.sync_history import add_sync_event +from cnaas_nms.tools.githelpers import parse_git_changed_files from cnaas_nms.tools.log import get_logger from git import Repo @@ -13,10 +18,50 @@ class WorktreeError(Exception): pass -def clean_templates_worktree(): +def refresh_existing_templates_worktrees(job_id: int | None, group_settings: dict, device_primary_groups: dict): + """Look for existing worktrees and refresh them""" + logger = get_logger() + updated_groups: Set[str] = set() + commit_by: str = "" if os.path.isdir("/tmp/worktrees"): for subdir in os.listdir("/tmp/worktrees"): - shutil.rmtree("/tmp/worktrees/" + subdir, ignore_errors=True) + try: + logger.info("Pulling worktree for branch {}".format(subdir)) + wt_repo = Repo("/tmp/worktrees/" + subdir) + prev_commit = wt_repo.commit().hexsha + diff = wt_repo.remotes.origin.pull() + if not diff: + continue + + changed_files: Set[str] + commit_by_new, changed_files = parse_git_changed_files(diff, prev_commit, wt_repo) + commit_by += commit_by_new + # don't update updated_groups if changes were only in other branches + if not changed_files: + continue + except Exception as e: + logger.exception(e) + shutil.rmtree("/tmp/worktrees/" + subdir, ignore_errors=True) + updated_groups.update(get_groups_using_branch(subdir, group_settings)) + + # find all devices that are using these branches and mark them as unsynchronized + updated_hostnames: Set[str] = set() + with sqla_session() as session: + for hostname, primary_group in device_primary_groups.items(): + if hostname in updated_hostnames: + continue + if primary_group in updated_groups: + dev: Device = session.query(Device).filter_by(hostname=hostname).one_or_none() + if dev: + dev.synchronized = False + add_sync_event(hostname, "refresh_templates", commit_by, job_id) + updated_hostnames.add(hostname) + if updated_hostnames: + logger.debug( + "Devices marked as unsynchronized because git worktree branches were refreshed: {}".format( + ", ".join(updated_hostnames) + ) + ) local_repo = Repo(app_settings.TEMPLATES_LOCAL) local_repo.git.worktree("prune") diff --git a/src/cnaas_nms/db/groups.py b/src/cnaas_nms/db/groups.py new file mode 100644 index 00000000..47945d00 --- /dev/null +++ b/src/cnaas_nms/db/groups.py @@ -0,0 +1,14 @@ +from typing import List + +# TODO: move all group related things here from settings +# make new settings_helper.py with (verify_dir_structure etc) and separate settings_groups for get_settigns groups? +# use get_group_settings_asdict instead of passing dict in get_groups_using_branch below + + +def get_groups_using_branch(branch_name: str, group_settings: dict) -> List[str]: + """Returns a list of group names that use the specified branch name""" + groups = [] + for group_name, group_data in group_settings.items(): + if group_data.get("templates_branch") == branch_name: + groups.append(group_name) + return groups diff --git a/src/cnaas_nms/devicehandler/get.py b/src/cnaas_nms/devicehandler/get.py index 155237f1..6a3ad6de 100644 --- a/src/cnaas_nms/devicehandler/get.py +++ b/src/cnaas_nms/devicehandler/get.py @@ -1,7 +1,9 @@ import hashlib +import os import re from typing import Dict, List, Optional, Set +import yaml from netutils.config import compliance from netutils.lib_mapper import NAPALM_LIB_MAPPER from nornir.core.filter import F @@ -12,7 +14,10 @@ import cnaas_nms.devicehandler.nornir_helper from cnaas_nms.db.device import Device, DeviceType from cnaas_nms.db.device_vars import expand_interface_settings +from cnaas_nms.db.exceptions import RepoStructureException +from cnaas_nms.db.git import get_template_repo_path from cnaas_nms.db.interface import Interface, InterfaceConfigType, InterfaceError +from cnaas_nms.tools.jinja_filters import get_config_section from cnaas_nms.tools.log import get_logger @@ -52,7 +57,32 @@ def get_running_config_interface(session, hostname: str, interface: str) -> str: return "\n".join(ret) -def calc_config_hash(hostname, config): +def get_unmanaged_config_sections(hostname: str, platform: str, devtype: DeviceType) -> List[str]: + local_repo_path = get_template_repo_path(hostname) + + mapfile = os.path.join(local_repo_path, platform, "mapping.yml") + if not os.path.isfile(mapfile): + raise RepoStructureException("File {} not found in template repo".format(mapfile)) + with open(mapfile, "r") as f: + mapping = yaml.safe_load(f) + if ( + "unmanaged_config_sections" in mapping[devtype.name] + and type(mapping[devtype.name]["unmanaged_config_sections"]) is list + ): + return mapping[devtype.name]["unmanaged_config_sections"] + return [] + + +def calc_config_hash(hostname: str, config: str, platform: str, devtype: DeviceType): + ignore_config_sections: List[str] = get_unmanaged_config_sections(hostname, platform, devtype) + for section in ignore_config_sections: + skip_section = get_config_section(config, section, platform) + if skip_section: + config = config.replace(skip_section, "") + if platform == "junos": + # remove line starting with "## Last commit" from config string so we don't get config hash mismatch + config = re.sub(r"^#{2}.*\n", "", config, flags=re.MULTILINE) + config = config.replace("\n", "") try: hash_object = hashlib.sha256(config.encode()) except Exception: diff --git a/src/cnaas_nms/devicehandler/init_device.py b/src/cnaas_nms/devicehandler/init_device.py index 74a7350b..8ea096b9 100644 --- a/src/cnaas_nms/devicehandler/init_device.py +++ b/src/cnaas_nms/devicehandler/init_device.py @@ -566,7 +566,7 @@ def init_access_device_step1( "secondary_mgmt_gw": secondary_mgmt_gw_ipif.ip, } ) - device_variables = populate_device_vars(session, dev, new_hostname, DeviceType.ACCESS) + device_variables = populate_device_vars(None, session, dev, new_hostname, DeviceType.ACCESS) device_variables = {**device_variables, **mgmt_variables} # Update device state old_hostname = dev.hostname @@ -755,7 +755,7 @@ def init_fabric_device_step1( "infra_ip": str(infra_ip), } - device_variables = populate_device_vars(session, dev, new_hostname, devtype) + device_variables = populate_device_vars(None, session, dev, new_hostname, devtype) device_variables = {**device_variables, **mgmt_variables} # Update device state dev.hostname = new_hostname diff --git a/src/cnaas_nms/devicehandler/nornir_plugins/cnaas_inventory.py b/src/cnaas_nms/devicehandler/nornir_plugins/cnaas_inventory.py index 4f9a914b..3780fe12 100644 --- a/src/cnaas_nms/devicehandler/nornir_plugins/cnaas_inventory.py +++ b/src/cnaas_nms/devicehandler/nornir_plugins/cnaas_inventory.py @@ -3,7 +3,7 @@ from nornir.core.inventory import ConnectionOptions, Defaults, Group, Groups, Host, Hosts, Inventory, ParentGroups import cnaas_nms.db.session -from cnaas_nms.app_settings import app_settings +from cnaas_nms.app_settings import api_settings, app_settings from cnaas_nms.db.device import Device, DeviceState, DeviceType from cnaas_nms.db.settings import get_groups from cnaas_nms.tools.pki import ssl_context @@ -41,11 +41,12 @@ def load(self) -> Inventory: connection_options={ "napalm": ConnectionOptions( extras={ + "timeout": api_settings.NAPALM_TIMEOUT, "optional_args": { # args to eAPI HttpsEapiConnection for EOS "enforce_verification": True, "context": ssl_context, - } + }, } ), "netmiko": ConnectionOptions(extras={}), diff --git a/src/cnaas_nms/devicehandler/sync_devices.py b/src/cnaas_nms/devicehandler/sync_devices.py index 0c44729d..98e740d3 100644 --- a/src/cnaas_nms/devicehandler/sync_devices.py +++ b/src/cnaas_nms/devicehandler/sync_devices.py @@ -1,6 +1,5 @@ import os import time -from hashlib import sha256 from ipaddress import IPv4Address, IPv4Interface, ip_interface from typing import Any, List, Optional, Tuple @@ -14,16 +13,15 @@ from nornir_utils.plugins.functions import print_result import cnaas_nms.db.helper -from cnaas_nms.app_settings import api_settings, app_settings +from cnaas_nms.app_settings import api_settings from cnaas_nms.db.device import Device, DeviceState, DeviceType from cnaas_nms.db.device_vars import expand_interface_settings -from cnaas_nms.db.git import RepoStructureException -from cnaas_nms.db.git_worktrees import find_templates_worktree_path +from cnaas_nms.db.git import RepoStructureException, get_template_repo_path from cnaas_nms.db.interface import Interface from cnaas_nms.db.job import Job from cnaas_nms.db.joblock import Joblock, JoblockError from cnaas_nms.db.session import redis_session, sqla_session -from cnaas_nms.db.settings import get_device_primary_groups, get_group_templates_branch, get_settings +from cnaas_nms.db.settings import get_settings from cnaas_nms.devicehandler.changescore import calculate_score from cnaas_nms.devicehandler.get import calc_config_hash from cnaas_nms.devicehandler.nornir_helper import NornirJobResult, cnaas_init, get_jinja_env, inventory_selector @@ -107,7 +105,7 @@ def get_mlag_vars(session, dev: Device) -> dict: def populate_device_vars( - session, dev: Device, ztp_hostname: Optional[str] = None, ztp_devtype: Optional[DeviceType] = None + task, session, dev: Device, ztp_hostname: Optional[str] = None, ztp_devtype: Optional[DeviceType] = None ): logger = get_logger() device_variables: dict[str, Any] = { @@ -362,6 +360,29 @@ def populate_device_vars( ) device_variables = {**device_variables, **fabric_device_variables} + # if platform/devtype has unmanaged config sections, get running_config and add to device_variables + local_repo_path = get_template_repo_path(hostname) + mapfile = os.path.join(local_repo_path, dev.platform, "mapping.yml") + if not os.path.isfile(mapfile): + raise RepoStructureException("File {} not found in template repo".format(mapfile)) + with open(mapfile, "r") as f: + mapping = yaml.safe_load(f) + if ( + "unmanaged_config_sections" in mapping[devtype.name] + and type(mapping[devtype.name]["unmanaged_config_sections"]) is list + ): + task.host.open_connection("napalm", configuration=task.nornir.config) + res = task.run(task=napalm_get, getters=["config"]) + task.host.close_connection("napalm") + + running_config = dict(res.result)["config"]["running"] + # Remove the first task result, which is the napalm_get result, since it's not needed for final job result + del task.results[0] + if running_config is None: + raise Exception(f"Failed to get running configuration for {dev.hostname}") + + device_variables["running_config"] = running_config + # Add all environment variables starting with TEMPLATE_SECRET_ to # the list of configuration variables. The idea is to store secret # configuration outside of the templates repository. @@ -515,20 +536,11 @@ def push_sync_device( hostname = task.host.name with sqla_session() as session: # type: ignore dev: Device = session.query(Device).filter(Device.hostname == hostname).one() - template_vars = populate_device_vars(session, dev) + template_vars = populate_device_vars(task, session, dev) platform = dev.platform devtype = dev.device_type - local_repo_path = app_settings.TEMPLATES_LOCAL - - # override template path if primary group template path is set - primary_group = get_device_primary_groups().get(hostname) - if primary_group: - templates_branch = get_group_templates_branch(primary_group) - if templates_branch: - primary_group_template_path = find_templates_worktree_path(templates_branch) - if primary_group_template_path: - local_repo_path = primary_group_template_path + local_repo_path = get_template_repo_path(hostname) mapfile = os.path.join(local_repo_path, str(platform), "mapping.yml") if not os.path.isfile(mapfile): @@ -661,6 +673,7 @@ def sync_check_hash(task, force=False, job_id=None): task: Nornir task force: Ignore device hash """ + logger = get_logger() set_thread_data(job_id) if force is True: return @@ -673,11 +686,12 @@ def sync_check_hash(task, force=False, job_id=None): res = task.run(task=napalm_get, getters=["config"]) task.host.close_connection("napalm") - running_config = dict(res.result)["config"]["running"].encode() - if running_config is None: - raise Exception("Failed to get running configuration") - hash_obj = sha256(running_config) - running_hash = hash_obj.hexdigest() + try: + devtype = Device.nornir_groups_to_devicetype(task.host.groups) + except Exception as e: + logger.error("Unable to determine device type") + logger.exception(e) + running_hash = calc_config_hash(task.host.name, dict(res.result)["config"]["running"], task.host.platform, devtype) if stored_hash != running_hash: raise Exception("Device {} configuration is altered outside of CNaaS!".format(task.host.name)) @@ -693,7 +707,16 @@ def update_config_hash(task): or "config" not in res[0].result ): raise Exception("Unable to get config from device") - new_config_hash = calc_config_hash(task.host.name, res[0].result["config"]["running"]) + + try: + devtype = Device.nornir_groups_to_devicetype(task.host.groups) + except Exception as e: + logger.error("Unable to determine device type") + logger.exception(e) + + new_config_hash = calc_config_hash( + task.host.name, res[0].result["config"]["running"], task.host.platform, devtype + ) if not new_config_hash: raise ValueError("Empty config hash") except Exception as e: diff --git a/src/cnaas_nms/tools/githelpers.py b/src/cnaas_nms/tools/githelpers.py new file mode 100644 index 00000000..ee3446ee --- /dev/null +++ b/src/cnaas_nms/tools/githelpers.py @@ -0,0 +1,23 @@ +from typing import List, Set, Tuple + +import git.remote + +from git import Repo + + +def parse_git_changed_files( + diff: List[git.remote.FetchInfo], prev_commit: str, local_repo: Repo +) -> Tuple[str, Set[str]]: + ret_msg = "" + changed_files: Set[str] = set() + for item in diff: + if item.ref.remote_head != local_repo.head.ref.name: # type: ignore[attr-defined] + continue + + ret_msg += "Commit {} by {} at {}\n".format( + item.commit.name_rev, item.commit.committer, item.commit.committed_datetime + ) + diff_files = local_repo.git.diff("{}..{}".format(prev_commit, item.commit.hexsha), name_only=True).split() + changed_files.update(diff_files) + prev_commit = item.commit.hexsha + return ret_msg, changed_files diff --git a/src/cnaas_nms/tools/jinja_filters.py b/src/cnaas_nms/tools/jinja_filters.py index f09d225c..f63095a4 100644 --- a/src/cnaas_nms/tools/jinja_filters.py +++ b/src/cnaas_nms/tools/jinja_filters.py @@ -6,6 +6,15 @@ import re from typing import Any, Callable, Optional +from netutils.config.parser import ( + BaseSpaceConfigParser, + EOSConfigParser, + IOSConfigParser, + IOSXRConfigParser, + JunosConfigParser, + NXOSConfigParser, +) + # This global dict can be used to update the Jinja environment filters dict to include all # registered template filter function FILTERS = {} @@ -213,3 +222,44 @@ def sha512(s: str) -> str: def md5(s: str) -> str: """Return SHA256 hexdigest of string s.""" return hashlib.md5(s.encode()).hexdigest() + + +@template_filter() +def get_config_section(config: str, section: str, parser: str) -> str: + """ + Get the configuration block for a specific section. + + Args: + config (str): The config used to for parsing and search a specific section. + section (str): The section to retrieve. Regex can be used as "^(firewall)\s*\{" + parser (str): The parser corresponding to the config type, e.g. junos, eos, nxos, iosxr, ios. + + Returns: + str: The text of the configuration block if found, empty string otherwise. + + test: + get_config_section(config=firewall_config, section="firewall", parser="junos") + """ # noqa: W605 + if parser.lower() == "junos": + parser_obj: type[BaseSpaceConfigParser] = JunosConfigParser + elif parser.lower() == "eos": + parser_obj = EOSConfigParser + elif parser.lower() == "nxos": + parser_obj = NXOSConfigParser + elif parser.lower() == "iosxr": + parser_obj = IOSXRConfigParser + elif parser.lower() == "ios": + parser_obj = IOSConfigParser + else: + parser_obj = BaseSpaceConfigParser + config_parser = parser_obj(config) + config_parser.build_config_relationship() + children = config_parser.find_all_children(section, match_type="regex") + + if len(children) == 1: + return children[0] + if len(children) > 1: + collect = "\n".join(children) + return collect + "\n}" if isinstance(config_parser, JunosConfigParser) else collect + + return ""