Skip to content

Commit

Permalink
Fix/first unit restart (#110)
Browse files Browse the repository at this point in the history
* bump lib

* wait for unit in cluster before setting tls

* fix for single units restart

* removed unused import

* Add log + error handling
  • Loading branch information
paulomach authored Oct 25, 2022
1 parent c963c7d commit a245206
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 12 deletions.
16 changes: 5 additions & 11 deletions lib/charms/mysql/v0/mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def wait_until_mysql_connection(self) -> None:
import logging
import re
from abc import ABC, abstractmethod
from typing import Iterable, List, Optional, Set, Tuple
from typing import Iterable, List, Optional, Tuple

from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_random

Expand All @@ -83,7 +83,7 @@ def wait_until_mysql_connection(self) -> None:

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 8
LIBPATCH = 9

UNIT_TEARDOWN_LOCKNAME = "unit-teardown"

Expand Down Expand Up @@ -1075,17 +1075,11 @@ def get_member_state(self) -> Tuple[str, str]:
# MEMBER_ROLE is empty if member is not in a group
return results[0], results[1] if len(results) == 2 else "unknown"

def reboot_from_complete_outage(self, instance_names: Set[str]) -> None:
"""Wrapper for reboot_cluster_from_complete_outage command.
Args:
instance_names: set of instance names (e.g. `juju-e3f183-4:3306`)
"""
options = {"rejoinInstances": list(instance_names)}

def reboot_from_complete_outage(self) -> None:
"""Wrapper for reboot_cluster_from_complete_outage command."""
rejoin_command = (
f"shell.connect('{self.cluster_admin_user}:{self.cluster_admin_password}@{self.instance_address}')",
f"dba.reboot_cluster_from_complete_outage('{self.cluster_name}', {json.dumps(options)} )",
f"dba.reboot_cluster_from_complete_outage('{self.cluster_name}')",
)

try:
Expand Down
15 changes: 14 additions & 1 deletion src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
MySQLConfigureMySQLUsersError,
MySQLCreateClusterError,
MySQLGetMySQLVersionError,
MySQLRebootFromCompleteOutageError,
)
from charms.rolling_ops.v0.rollingops import RollingOpsManager
from ops.charm import (
Expand Down Expand Up @@ -513,6 +514,17 @@ def _restart(self, _) -> None:
container = self.unit.get_container(CONTAINER_NAME)
container.restart(MYSQLD_SERVICE)

# when restart done right after cluster creation (e.g bundles)
# or for single unit deployments, it's necessary reboot the
# cluster from outage to restore unit as primary
if self.app_peer_data["units-added-to-cluster"] == "1":
try:
self._mysql.reboot_from_complete_outage()
except MySQLRebootFromCompleteOutageError:
logger.error("Failed to restart single node cluster")
self.unit.status = BlockedStatus("Failed to restart primary")
return

unit_label = self.unit.name.replace("/", "-")

try:
Expand All @@ -523,10 +535,11 @@ def _restart(self, _) -> None:
# `self.active_status_message` once it gets merged
self.unit.status = ActiveStatus()
return
logger.debug("Restarted instance not yet in cluster")
raise Exception
except RetryError:
logger.error("Unable to rejoin mysqld instance to the cluster.")
self.unit.status = BlockedStatus("Restarted node unable to rejoin the cluster")
self.unit.status = BlockedStatus("Restarted instance unable to rejoin the cluster")


if __name__ == "__main__":
Expand Down
5 changes: 5 additions & 0 deletions src/relations/mysql_tls.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ def _on_tls_relation_joined(self, _) -> None:

def _on_certificate_available(self, event: CertificateAvailableEvent) -> None:
"""Enable TLS when TLS certificate available."""
if not self.charm.unit_initialized:
logger.debug("Wait unit initialise before request certificate.")
event.defer()
return

if (
event.certificate_signing_request.strip()
!= self.charm.get_secret(SCOPE, "csr").strip()
Expand Down

0 comments on commit a245206

Please sign in to comment.