Skip to content

Commit

Permalink
Cleans up files and stops k8s-dqlite on remove hook
Browse files Browse the repository at this point in the history
cfg.Datastore.GetType() may return an empty string if the bootstrap
action failed before database.SetClusterConfig has been called. Because
of this, we're not removing the state dir for k8s-dqlite, which will be
wrongfully removed by setup.K8sDqlite on the next bootstrap attempt.

We're now opportunistically cleaning up the k8s-dqlite related state
directory.

Additionally, in the remove hook, we're ensuring that the PKI files
exist, instead of removing them, contrary to what the log messages would
also suggest. This addresses this issue as well.

If a bootstrap attempt fails, the k8s-dqlite service will still be
running, which will cause the next bootstrap attempt to fail, as the
k8s-dqlite port will be currently in use.
  • Loading branch information
claudiubelu committed Dec 16, 2024
1 parent 0cdeb87 commit 8b30ce4
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 8 deletions.
20 changes: 12 additions & 8 deletions src/k8s/pkg/k8sd/app/hooks_remove.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (

apiv1_annotations "github.com/canonical/k8s-snap-api/api/v1/annotations"
databaseutil "github.com/canonical/k8s/pkg/k8sd/database/util"
"github.com/canonical/k8s/pkg/k8sd/pki"
"github.com/canonical/k8s/pkg/k8sd/setup"
"github.com/canonical/k8s/pkg/log"
"github.com/canonical/k8s/pkg/snap"
Expand Down Expand Up @@ -92,13 +91,9 @@ func (a *App) onPreRemove(ctx context.Context, s state.State, force bool) (rerr
log.Error(err, "Failed to create k8s-dqlite client: %w")
}

log.Info("Cleaning up k8s-dqlite directory")
if err := os.RemoveAll(snap.K8sDqliteStateDir()); err != nil {
return fmt.Errorf("failed to cleanup k8s-dqlite state directory: %w", err)
}
case "external":
log.Info("Cleaning up external datastore certificates")
if _, err := setup.EnsureExtDatastorePKI(snap, &pki.ExternalDatastorePKI{}); err != nil {
if err := setup.RemoveExtDatastorePKIFiles(snap); err != nil {
log.Error(err, "Failed to cleanup external datastore certificates")
}
default:
Expand All @@ -107,6 +102,10 @@ func (a *App) onPreRemove(ctx context.Context, s state.State, force bool) (rerr
log.Error(err, "Failed to retrieve cluster config")
}

log.Info("Cleaning up k8s-dqlite directory")
if err := os.RemoveAll(snap.K8sDqliteStateDir()); err != nil {
return fmt.Errorf("failed to cleanup k8s-dqlite state directory: %w", err)
}
for _, dir := range []string{snap.ServiceArgumentsDir()} {
log.WithValues("directory", dir).Info("Cleaning up config files", dir)
if err := os.RemoveAll(dir); err != nil {
Expand All @@ -118,7 +117,7 @@ func (a *App) onPreRemove(ctx context.Context, s state.State, force bool) (rerr
// Trying to detect the node type is not reliable as the node might have been marked as worker
// or not, depending on which step it failed.
log.Info("Cleaning up worker certificates")
if _, err := setup.EnsureWorkerPKI(snap, &pki.WorkerNodePKI{}); err != nil {
if err := setup.RemoveWorkerPKIFiles(snap); err != nil {
log.Error(err, "failed to cleanup worker certificates")
}

Expand All @@ -130,7 +129,7 @@ func (a *App) onPreRemove(ctx context.Context, s state.State, force bool) (rerr
}

log.Info("Cleaning up control plane certificates")
if _, err := setup.EnsureControlPlanePKI(snap, &pki.ControlPlanePKI{}); err != nil {
if err := setup.RemoveControlNodePKIFiles(snap); err != nil {
log.Error(err, "failed to cleanup control plane certificates")
}

Expand All @@ -144,6 +143,11 @@ func (a *App) onPreRemove(ctx context.Context, s state.State, force bool) (rerr
if err := snaputil.StopControlPlaneServices(ctx, snap); err != nil {
log.Error(err, "Failed to stop control-plane services")
}

log.Info("Stopping k8s-dqlite")
if err := snaputil.StopK8sDqliteServices(ctx, snap); err != nil {
log.Error(err, "Failed to stop k8s-dqlite service")
}
}

tryCleanupContainerdPaths(log, snap)
Expand Down
51 changes: 51 additions & 0 deletions src/k8s/pkg/k8sd/setup/certificates.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,15 @@ func ensureFiles(uid, gid int, mode fs.FileMode, files map[string]string) (bool,
return changed, nil
}

func removeFiles(files []string) error {
for _, fname := range files {
if err := os.Remove(fname); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to remove %s: %w", fname, err)
}
}
return nil
}

// EnsureExtDatastorePKI ensures the external datastore PKI files are present
// and have the correct content, permissions and ownership.
// It returns true if one or more files were updated and any error that occurred.
Expand All @@ -81,6 +90,16 @@ func EnsureExtDatastorePKI(snap snap.Snap, certificates *pki.ExternalDatastorePK
})
}

// RemoveExtDatastorePKIFiles removes the external datastore PKI files.
func RemoveExtDatastorePKIFiles(snap snap.Snap) error {
files := []string{
filepath.Join(snap.EtcdPKIDir(), "ca.crt"),
filepath.Join(snap.EtcdPKIDir(), "client.key"),
filepath.Join(snap.EtcdPKIDir(), "client.crt"),
}
return removeFiles(files)
}

// EnsureK8sDqlitePKI ensures the k8s dqlite PKI files are present
// and have the correct content, permissions and ownership.
// It returns true if one or more files were updated and any error that occurred.
Expand Down Expand Up @@ -113,6 +132,27 @@ func EnsureControlPlanePKI(snap snap.Snap, certificates *pki.ControlPlanePKI) (b
})
}

// RemoveControlNodePKIFiles removes the control plane PKI files.
func RemoveControlNodePKIFiles(snap snap.Snap) error {
files := []string{
filepath.Join(snap.KubernetesPKIDir(), "apiserver-kubelet-client.crt"),
filepath.Join(snap.KubernetesPKIDir(), "apiserver-kubelet-client.key"),
filepath.Join(snap.KubernetesPKIDir(), "apiserver.crt"),
filepath.Join(snap.KubernetesPKIDir(), "apiserver.key"),
filepath.Join(snap.KubernetesPKIDir(), "ca.crt"),
filepath.Join(snap.KubernetesPKIDir(), "client-ca.crt"),
filepath.Join(snap.KubernetesPKIDir(), "ca.key"),
filepath.Join(snap.KubernetesPKIDir(), "front-proxy-ca.crt"),
filepath.Join(snap.KubernetesPKIDir(), "front-proxy-ca.key"),
filepath.Join(snap.KubernetesPKIDir(), "front-proxy-client.crt"),
filepath.Join(snap.KubernetesPKIDir(), "front-proxy-client.key"),
filepath.Join(snap.KubernetesPKIDir(), "kubelet.crt"),
filepath.Join(snap.KubernetesPKIDir(), "kubelet.key"),
filepath.Join(snap.KubernetesPKIDir(), "serviceaccount.key"),
}
return removeFiles(files)
}

// EnsureWorkerPKI ensures the worker PKI files are present
// and have the correct content, permissions and ownership.
// It returns true if one or more files were updated and any error that occurred.
Expand All @@ -124,3 +164,14 @@ func EnsureWorkerPKI(snap snap.Snap, certificates *pki.WorkerNodePKI) (bool, err
filepath.Join(snap.KubernetesPKIDir(), "kubelet.key"): certificates.KubeletKey,
})
}

// RemoveWorkerPKIFiles removes the worker PKI files.
func RemoveWorkerPKIFiles(snap snap.Snap) error {
files := []string{
filepath.Join(snap.KubernetesPKIDir(), "ca.crt"),
filepath.Join(snap.KubernetesPKIDir(), "client-ca.crt"),
filepath.Join(snap.KubernetesPKIDir(), "kubelet.crt"),
filepath.Join(snap.KubernetesPKIDir(), "kubelet.key"),
}
return removeFiles(files)
}

0 comments on commit 8b30ce4

Please sign in to comment.