Skip to content

Commit

Permalink
Cleans up files and removes k8s-dqlite on remove hook
Browse files Browse the repository at this point in the history
cfg.Datastore.GetType() may return an empty string if the bootstrap
action failed before database.SetClusterConfig has been called. Because
of this, we're not doing the proper clean up in the remove hook.

We're now opportunistically cleaning up the k8s-dqlite related
resources / external datastore certificates.

Additionally, in the remove hook, we're ensuring that the PKI files
exist, instead of removing them, contrary to what the log messages would
also suggest. This addresses this issue as well.
  • Loading branch information
claudiubelu committed Dec 16, 2024
1 parent 0cdeb87 commit a443e95
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 26 deletions.
48 changes: 22 additions & 26 deletions src/k8s/pkg/k8sd/app/hooks_remove.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (

apiv1_annotations "github.com/canonical/k8s-snap-api/api/v1/annotations"
databaseutil "github.com/canonical/k8s/pkg/k8sd/database/util"
"github.com/canonical/k8s/pkg/k8sd/pki"
"github.com/canonical/k8s/pkg/k8sd/setup"
"github.com/canonical/k8s/pkg/log"
"github.com/canonical/k8s/pkg/snap"
Expand Down Expand Up @@ -77,31 +76,28 @@ func (a *App) onPreRemove(ctx context.Context, s state.State, force bool) (rerr
}
}

switch cfg.Datastore.GetType() {
case "k8s-dqlite":
client, err := snap.K8sDqliteClient(ctx)
if err == nil {
log.Info("Removing node from k8s-dqlite cluster")
nodeAddress := net.JoinHostPort(s.Address().Hostname(), fmt.Sprintf("%d", cfg.Datastore.GetK8sDqlitePort()))
if err := client.RemoveNodeByAddress(ctx, nodeAddress); err != nil {
// Removing the node might fail (e.g. if it is the only one in the cluster).
// We still want to continue with the file cleanup, hence we only log the error.
log.Error(err, "Failed to remove node from k8s-dqlite cluster")
}
} else {
log.Error(err, "Failed to create k8s-dqlite client: %w")
// opportunistically cleanup for both k8s-dqlite and external datastore.
client, err := snap.K8sDqliteClient(ctx)
if err == nil {
log.Info("Removing node from k8s-dqlite cluster")
nodeAddress := net.JoinHostPort(s.Address().Hostname(), fmt.Sprintf("%d", cfg.Datastore.GetK8sDqlitePort()))
if err := client.RemoveNodeByAddress(ctx, nodeAddress); err != nil {
// Removing the node might fail (e.g. if it is the only one in the cluster).
// We still want to continue with the file cleanup, hence we only log the error.
log.Error(err, "Failed to remove node from k8s-dqlite cluster")
}
} else {
log.Error(err, "Failed to create k8s-dqlite client: %w")
}

log.Info("Cleaning up k8s-dqlite directory")
if err := os.RemoveAll(snap.K8sDqliteStateDir()); err != nil {
return fmt.Errorf("failed to cleanup k8s-dqlite state directory: %w", err)
}
case "external":
log.Info("Cleaning up external datastore certificates")
if _, err := setup.EnsureExtDatastorePKI(snap, &pki.ExternalDatastorePKI{}); err != nil {
log.Error(err, "Failed to cleanup external datastore certificates")
}
default:
log.Info("Cleaning up k8s-dqlite directory")
if err := os.RemoveAll(snap.K8sDqliteStateDir()); err != nil {
return fmt.Errorf("failed to cleanup k8s-dqlite state directory: %w", err)
}

log.Info("Cleaning up external datastore certificates")
if err := setup.RemoveExtDatastorePKIFiles(snap); err != nil {
log.Error(err, "Failed to cleanup external datastore certificates")
}
} else {
log.Error(err, "Failed to retrieve cluster config")
Expand All @@ -118,7 +114,7 @@ func (a *App) onPreRemove(ctx context.Context, s state.State, force bool) (rerr
// Trying to detect the node type is not reliable as the node might have been marked as worker
// or not, depending on which step it failed.
log.Info("Cleaning up worker certificates")
if _, err := setup.EnsureWorkerPKI(snap, &pki.WorkerNodePKI{}); err != nil {
if err := setup.RemoveWorkerPKIFiles(snap); err != nil {
log.Error(err, "failed to cleanup worker certificates")
}

Expand All @@ -130,7 +126,7 @@ func (a *App) onPreRemove(ctx context.Context, s state.State, force bool) (rerr
}

log.Info("Cleaning up control plane certificates")
if _, err := setup.EnsureControlPlanePKI(snap, &pki.ControlPlanePKI{}); err != nil {
if err := setup.RemoveControlNodePKIFiles(snap); err != nil {
log.Error(err, "failed to cleanup control plane certificates")
}

Expand Down
51 changes: 51 additions & 0 deletions src/k8s/pkg/k8sd/setup/certificates.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,15 @@ func ensureFiles(uid, gid int, mode fs.FileMode, files map[string]string) (bool,
return changed, nil
}

func removeFiles(files []string) error {
for fname := range files {
if err := os.Remove(fname); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to remove %s: %w", fname, err)
}
}
return nil
}

// EnsureExtDatastorePKI ensures the external datastore PKI files are present
// and have the correct content, permissions and ownership.
// It returns true if one or more files were updated and any error that occurred.
Expand All @@ -81,6 +90,16 @@ func EnsureExtDatastorePKI(snap snap.Snap, certificates *pki.ExternalDatastorePK
})
}

// RemoveExtDatastorePKIFiles removes the external datastore PKI files.
func RemoveExtDatastorePKIFiles(snap snap.Snap) error {
files := []string{
filepath.Join(snap.EtcdPKIDir(), "ca.crt"),
filepath.Join(snap.EtcdPKIDir(), "client.key"),
filepath.Join(snap.EtcdPKIDir(), "client.crt"),
}
return removeFiles(files)
}

// EnsureK8sDqlitePKI ensures the k8s dqlite PKI files are present
// and have the correct content, permissions and ownership.
// It returns true if one or more files were updated and any error that occurred.
Expand Down Expand Up @@ -113,6 +132,27 @@ func EnsureControlPlanePKI(snap snap.Snap, certificates *pki.ControlPlanePKI) (b
})
}

// RemoveControlNodePKIFiles removes the control plane PKI files.
func RemoveControlNodePKIFiles(snap snap.Snap) error {
files := []string{
filepath.Join(snap.KubernetesPKIDir(), "apiserver-kubelet-client.crt"),
filepath.Join(snap.KubernetesPKIDir(), "apiserver-kubelet-client.key"),
filepath.Join(snap.KubernetesPKIDir(), "apiserver.crt"),
filepath.Join(snap.KubernetesPKIDir(), "apiserver.key"),
filepath.Join(snap.KubernetesPKIDir(), "ca.crt"),
filepath.Join(snap.KubernetesPKIDir(), "client-ca.crt"),
filepath.Join(snap.KubernetesPKIDir(), "ca.key"),
filepath.Join(snap.KubernetesPKIDir(), "front-proxy-ca.crt"),
filepath.Join(snap.KubernetesPKIDir(), "front-proxy-ca.key"),
filepath.Join(snap.KubernetesPKIDir(), "front-proxy-client.crt"),
filepath.Join(snap.KubernetesPKIDir(), "front-proxy-client.key"),
filepath.Join(snap.KubernetesPKIDir(), "kubelet.crt"),
filepath.Join(snap.KubernetesPKIDir(), "kubelet.key"),
filepath.Join(snap.KubernetesPKIDir(), "serviceaccount.key"),
}
return removeFiles(files)
}

// EnsureWorkerPKI ensures the worker PKI files are present
// and have the correct content, permissions and ownership.
// It returns true if one or more files were updated and any error that occurred.
Expand All @@ -124,3 +164,14 @@ func EnsureWorkerPKI(snap snap.Snap, certificates *pki.WorkerNodePKI) (bool, err
filepath.Join(snap.KubernetesPKIDir(), "kubelet.key"): certificates.KubeletKey,
})
}

// RemoveWorkerPKIFiles removes the worker PKI files.
func RemoveWorkerPKIFiles(snap snap.Snap) error {
files := []string{
filepath.Join(snap.KubernetesPKIDir(), "ca.crt"),
filepath.Join(snap.KubernetesPKIDir(), "client-ca.crt"),
filepath.Join(snap.KubernetesPKIDir(), "kubelet.crt"),
filepath.Join(snap.KubernetesPKIDir(), "kubelet.key"),
}
return removeFiles(files)
}

0 comments on commit a443e95

Please sign in to comment.