Skip to content

Commit

Permalink
Snapshottable recovery system (#2032)
Browse files Browse the repository at this point in the history
* Snapshottable recovery system

Deploy the entire recovery system to the same folder (kernel, initrd and
rootfs).

During upgrade deploy to a transitional folder and then switch it with
the current recovery system and then delete the old one.

This makes sure we clean up old recovery systems and don't risk mixing
systems during upgrade.

Signed-off-by: Fredrik Lönnegren <[email protected]>
  • Loading branch information
frelon authored Apr 5, 2024
1 parent 88a9d0c commit 127fa52
Show file tree
Hide file tree
Showing 14 changed files with 128 additions and 94 deletions.
2 changes: 1 addition & 1 deletion cmd/build-iso.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ func NewBuildISO(root *cobra.Command, addCheckRoot bool) *cobra.Command {
}

buildISO := action.NewBuildISOAction(cfg, spec)
return buildISO.ISORun()
return buildISO.Run()
},
}

Expand Down
9 changes: 1 addition & 8 deletions pkg/action/build-disk.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,16 +237,9 @@ func (b *BuildDiskAction) BuildDiskRun() (err error) { //nolint:gocyclo
return elementalError.NewFromError(err, elementalError.HookAfterDisk)
}

// Create recovery image
bootDir := filepath.Join(b.roots[constants.RecoveryPartName], "boot")
if err = utils.MkdirAll(b.cfg.Fs, bootDir, constants.DirPerm); err != nil {
b.cfg.Logger.Errorf("failed creating recovery boot dir: %v", err)
return err
}

tmpSrc := b.spec.RecoverySystem.Source
b.spec.RecoverySystem.Source = types.NewDirSrc(recRoot)
err = elemental.DeployRecoverySystem(b.cfg.Config, &b.spec.RecoverySystem, bootDir)
err = elemental.DeployRecoverySystem(b.cfg.Config, &b.spec.RecoverySystem)
if err != nil {
b.cfg.Logger.Errorf("failed deploying recovery system: %v", err)
return err
Expand Down
10 changes: 5 additions & 5 deletions pkg/action/build-iso.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func grubCfgTemplate(arch string) string {
menuentry "%s" --class os --unrestricted {
echo Loading kernel...
linux ($root)` + constants.ISOKernelPath(arch) + ` cdroot root=live:CDLABEL=%s rd.live.dir=/ rd.live.squashimg=rootfs.squashfs console=tty1 console=ttyS0 elemental.disable elemental.setup=` + constants.ISOCloudInitPath + `
linux ($root)` + constants.ISOKernelPath(arch) + ` cdroot root=live:CDLABEL=%s rd.live.dir=` + constants.ISOLoaderPath(arch) + ` rd.live.squashimg=rootfs.squashfs console=tty1 console=ttyS0 elemental.disable elemental.setup=` + constants.ISOCloudInitPath + `
echo Loading initrd...
initrd ($root)` + constants.ISOInitrdPath(arch) + `
}
Expand Down Expand Up @@ -78,8 +78,8 @@ func NewBuildISOAction(cfg *types.BuildConfig, spec *types.LiveISO, opts ...Buil
return b
}

// BuildISORun will install the system from a given configuration
func (b *BuildISOAction) ISORun() error {
// Run will install the system from a given configuration
func (b *BuildISOAction) Run() error {
cleanup := utils.NewCleanStack()
var err error
defer func() { err = cleanup.Cleanup(err) }()
Expand Down Expand Up @@ -170,11 +170,11 @@ func (b *BuildISOAction) ISORun() error {

image := &types.Image{
Source: types.NewDirSrc(rootDir),
File: filepath.Join(isoDir, constants.ISORootFile),
File: filepath.Join(bootDir, constants.ISORootFile),
FS: constants.SquashFs,
}

err = elemental.DeployRecoverySystem(b.cfg.Config, image, bootDir)
err = elemental.DeployRecoverySystem(b.cfg.Config, image)
if err != nil {
b.cfg.Logger.Errorf("Failed preparing ISO's root tree: %v", err)
return err
Expand Down
20 changes: 10 additions & 10 deletions pkg/action/build_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ var _ = Describe("Build Actions", func() {
}

buildISO := action.NewBuildISOAction(cfg, iso, action.WithLiveBootloader(bootloader))
err := buildISO.ISORun()
err := buildISO.Run()

Expect(err).ShouldNot(HaveOccurred())
})
Expand All @@ -138,7 +138,7 @@ var _ = Describe("Build Actions", func() {
iso.RootFS = append(iso.RootFS, rootSrc)

buildISO := action.NewBuildISOAction(cfg, iso, action.WithLiveBootloader(bootloader))
err := buildISO.ISORun()
err := buildISO.Run()
Expect(err).Should(HaveOccurred())
})
It("Fails on prepare ISO", func() {
Expand All @@ -148,7 +148,7 @@ var _ = Describe("Build Actions", func() {
iso.RootFS = append(iso.RootFS, rootSrc)

buildISO := action.NewBuildISOAction(cfg, iso, action.WithLiveBootloader(bootloader))
err := buildISO.ISORun()
err := buildISO.Run()

Expect(err).Should(HaveOccurred())
})
Expand All @@ -161,14 +161,14 @@ var _ = Describe("Build Actions", func() {

By("fails without kernel")
buildISO := action.NewBuildISOAction(cfg, iso, action.WithLiveBootloader(bootloader))
err = buildISO.ISORun()
err = buildISO.Run()
Expect(err).Should(HaveOccurred())

By("fails without initrd")
_, err = fs.Create("/local/dir/boot/vmlinuz")
Expect(err).ShouldNot(HaveOccurred())
buildISO = action.NewBuildISOAction(cfg, iso, action.WithLiveBootloader(bootloader))
err = buildISO.ISORun()
err = buildISO.Run()
Expect(err).Should(HaveOccurred())
})
It("Fails installing uefi sources", func() {
Expand All @@ -178,7 +178,7 @@ var _ = Describe("Build Actions", func() {
iso.UEFI = []*types.ImageSource{uefiSrc}

buildISO := action.NewBuildISOAction(cfg, iso)
err := buildISO.ISORun()
err := buildISO.Run()
Expect(err).Should(HaveOccurred())
})
It("Fails on ISO filesystem creation", func() {
Expand All @@ -193,7 +193,7 @@ var _ = Describe("Build Actions", func() {
}

buildISO := action.NewBuildISOAction(cfg, iso, action.WithLiveBootloader(bootloader))
err := buildISO.ISORun()
err := buildISO.Run()

Expect(err).Should(HaveOccurred())
})
Expand Down Expand Up @@ -228,7 +228,7 @@ var _ = Describe("Build Actions", func() {
Expect(buildDisk.BuildDiskRun()).To(Succeed())

Expect(runner.MatchMilestones([][]string{
{"mksquashfs", "/tmp/test/build/recovery.img.root", "/tmp/test/build/recovery/recovery.img"},
{"mksquashfs", "/tmp/test/build/recovery.img.root", "/tmp/test/build/recovery/boot/recovery.img"},
{"mkfs.ext4", "-L", "COS_STATE"},
{"losetup", "--show", "-f", "/tmp/test/build/state.part"},
{"mkfs.vfat", "-n", "COS_GRUB"},
Expand All @@ -255,7 +255,7 @@ var _ = Describe("Build Actions", func() {
Expect(buildDisk.BuildDiskRun()).To(Succeed())

Expect(runner.MatchMilestones([][]string{
{"mksquashfs", "/tmp/test/build/recovery.img.root", "/tmp/test/build/recovery/recovery.img"},
{"mksquashfs", "/tmp/test/build/recovery.img.root", "/tmp/test/build/recovery/boot/recovery.img"},
{"mkfs.vfat", "-n", "COS_GRUB"},
{"mkfs.ext4", "-L", "COS_OEM"},
{"mkfs.ext4", "-L", "COS_RECOVERY"},
Expand All @@ -274,7 +274,7 @@ var _ = Describe("Build Actions", func() {
Expect(buildDisk.BuildDiskRun()).NotTo(Succeed())

Expect(runner.MatchMilestones([][]string{
{"mksquashfs", "/tmp/test/build/recovery.img.root", "/tmp/test/build/recovery/recovery.img"},
{"mksquashfs", "/tmp/test/build/recovery.img.root", "/tmp/test/build/recovery/boot/recovery.img"},
})).To(Succeed())

// failed before preparing partitions images
Expand Down
2 changes: 1 addition & 1 deletion pkg/action/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ func (i InstallAction) Run() (err error) {
}
recoverySystem.Source.SetDigest(i.spec.System.GetDigest())
}
err = elemental.DeployRecoverySystem(i.cfg.Config, &recoverySystem, recoveryBootDir)
err = elemental.DeployRecoverySystem(i.cfg.Config, &recoverySystem)
if err != nil {
i.cfg.Logger.Errorf("Failed deploying recovery image: %v", err)
return elementalError.NewFromError(err, elementalError.DeployImage)
Expand Down
75 changes: 55 additions & 20 deletions pkg/action/upgrade-recovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,18 +84,22 @@ func NewUpgradeRecoveryAction(config *types.RunConfig, spec *types.UpgradeSpec,
return u, nil
}

func (u UpgradeRecoveryAction) Info(s string, args ...interface{}) {
func (u UpgradeRecoveryAction) Infof(s string, args ...interface{}) {
u.cfg.Logger.Infof(s, args...)
}

func (u UpgradeRecoveryAction) Debug(s string, args ...interface{}) {
func (u UpgradeRecoveryAction) Debugf(s string, args ...interface{}) {
u.cfg.Logger.Debugf(s, args...)
}

func (u UpgradeRecoveryAction) Error(s string, args ...interface{}) {
func (u UpgradeRecoveryAction) Errorf(s string, args ...interface{}) {
u.cfg.Logger.Errorf(s, args...)
}

func (u UpgradeRecoveryAction) Warnf(s string, args ...interface{}) {
u.cfg.Logger.Warnf(s, args...)
}

func (u *UpgradeRecoveryAction) mountRWPartitions(cleanup *utils.CleanStack) error {
umount, err := elemental.MountRWPartition(u.cfg.Config, u.spec.Partitions.Recovery)
if err != nil {
Expand Down Expand Up @@ -146,48 +150,79 @@ func (u *UpgradeRecoveryAction) Run() (err error) {
return err
}

// Create recovery /boot dir if not exists
bootDir := filepath.Join(u.spec.Partitions.Recovery.MountPoint, "boot")
if err := utils.MkdirAll(u.cfg.Fs, bootDir, constants.DirPerm); err != nil {
u.cfg.Logger.Errorf("failed creating recovery boot dir: %v", err)
return elementalError.NewFromError(err, elementalError.CreateDir)
// Remove any traces of previously errored upgrades
transitionDir := filepath.Join(u.spec.Partitions.Recovery.MountPoint, constants.BootTransitionDir)
u.Debugf("removing any orphaned recovery system %s", transitionDir)
err = utils.RemoveAll(u.cfg.Fs, transitionDir)
if err != nil {
u.Errorf("failed removing orphaned recovery image: %s", err.Error())
return err
}

// Upgrade recovery
err = elemental.DeployRecoverySystem(u.cfg.Config, &u.spec.RecoverySystem, bootDir)
// Deploy recovery system to transition dir
err = elemental.DeployRecoverySystem(u.cfg.Config, &u.spec.RecoverySystem)
if err != nil {
u.cfg.Logger.Errorf("failed deploying recovery image: %v", err)
u.cfg.Logger.Errorf("failed deploying recovery image: %s", err.Error())
return elementalError.NewFromError(err, elementalError.DeployImage)
}
recoveryFile := filepath.Join(u.spec.Partitions.Recovery.MountPoint, constants.RecoveryImgFile)
transitionFile := filepath.Join(u.spec.Partitions.Recovery.MountPoint, constants.TransitionImgFile)
if ok, _ := utils.Exists(u.cfg.Fs, recoveryFile); ok {
err = u.cfg.Fs.Remove(recoveryFile)

// Switch places on /boot and transition-dir
bootDir := filepath.Join(u.spec.Partitions.Recovery.MountPoint, constants.BootDir)
oldBootDir := filepath.Join(u.spec.Partitions.Recovery.MountPoint, constants.OldBootDir)

// If a previous upgrade failed, remove old boot-dir
err = utils.RemoveAll(u.cfg.Fs, oldBootDir)
if err != nil {
u.Errorf("failed removing orphaned recovery image: %s", err.Error())
return err
}

// Rename current boot-dir in case we need to use it again
if ok, _ := utils.Exists(u.cfg.Fs, bootDir); ok {
err = u.cfg.Fs.Rename(bootDir, oldBootDir)
if err != nil {
u.Error("failed removing old recovery image")
u.Errorf("failed removing old recovery image: %s", err.Error())
return err
}
}
err = u.cfg.Fs.Rename(transitionFile, recoveryFile)

// Move new boot-dir to /boot
err = u.cfg.Fs.Rename(transitionDir, bootDir)
if err != nil {
u.Error("failed renaming transition recovery image")
u.cfg.Logger.Errorf("failed renaming transition recovery image: %s", err.Error())

// Try to salvage old recovery system
if ok, _ := utils.Exists(u.cfg.Fs, oldBootDir); ok {
err = u.cfg.Fs.Rename(oldBootDir, bootDir)
if err != nil {
u.cfg.Logger.Errorf("failed salvaging old recovery system: %s", err.Error())
}
}

return err
}

// Remove old boot-dir when new recovery system is in place
err = utils.RemoveAll(u.cfg.Fs, oldBootDir)
if err != nil {
u.Warnf("failed removing old recovery image: %s", err.Error())
}

// Update state.yaml file on recovery and state partitions
if u.updateInstallState {
err = u.upgradeInstallStateYaml()
if err != nil {
u.Error("failed upgrading installation metadata")
u.Errorf("failed upgrading installation metadata: %s", err.Error())
return err
}
}

u.Info("Recovery upgrade completed")
u.Infof("Recovery upgrade completed")

// Do not reboot/poweroff on cleanup errors
err = cleanup.Cleanup(err)
if err != nil {
u.Errorf("failed cleanup: %s", err.Error())
return elementalError.NewFromError(err, elementalError.Cleanup)
}

Expand Down
29 changes: 15 additions & 14 deletions pkg/action/upgrade-recovery_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ var _ = Describe("Upgrade Recovery Actions", func() {
Expect(err).To(HaveOccurred())
})
It("Successfully upgrades recovery from docker image", Label("docker"), func() {
recoveryImgPath := filepath.Join(constants.LiveDir, constants.RecoveryImgFile)
recoveryImgPath := filepath.Join(constants.LiveDir, constants.BootDir, constants.RecoveryImgFile)
spec := PrepareTestRecoveryImage(config, constants.LiveDir, fs, runner)

// This should be the old image
Expand Down Expand Up @@ -212,7 +212,7 @@ var _ = Describe("Upgrade Recovery Actions", func() {
Expect(spec.State.Date).ToNot(BeEmpty(), "post-upgrade state should contain a date")
})
It("Successfully skips updateInstallState", Label("docker"), func() {
recoveryImgPath := filepath.Join(constants.LiveDir, constants.RecoveryImgFile)
recoveryImgPath := filepath.Join(constants.LiveDir, constants.BootDir, constants.RecoveryImgFile)
spec := PrepareTestRecoveryImage(config, constants.LiveDir, fs, runner)

// This should be the old image
Expand Down Expand Up @@ -253,7 +253,6 @@ var _ = Describe("Upgrade Recovery Actions", func() {
})

func PrepareTestRecoveryImage(config *types.RunConfig, recoveryPath string, fs vfs.FS, runner *mocks.FakeRunner) *types.UpgradeSpec {
GinkgoHelper()
// Create installState with squashed recovery
statePath := filepath.Join(constants.RunningStateDir, constants.InstallStateFile)
installState := &types.InstallState{
Expand All @@ -270,28 +269,30 @@ func PrepareTestRecoveryImage(config *types.RunConfig, recoveryPath string, fs v
}
Expect(config.WriteInstallState(installState, statePath, statePath)).ShouldNot(HaveOccurred())

recoveryImgPath := filepath.Join(recoveryPath, constants.RecoveryImgFile)
Expect(fs.WriteFile(recoveryImgPath, []byte("recovery"), constants.FilePerm)).ShouldNot(HaveOccurred())

transitionDir := filepath.Join(recoveryPath, "transition.imgTree")
Expect(utils.MkdirAll(fs, filepath.Join(transitionDir, "lib/modules/6.6"), constants.DirPerm)).ShouldNot(HaveOccurred())
bootDir := filepath.Join(transitionDir, "boot")
Expect(utils.MkdirAll(fs, bootDir, constants.DirPerm)).ShouldNot(HaveOccurred())
Expect(fs.WriteFile(filepath.Join(bootDir, "vmlinuz-6.6"), []byte("kernel"), constants.FilePerm)).ShouldNot(HaveOccurred())
Expect(fs.WriteFile(filepath.Join(bootDir, "elemental.initrd-6.6"), []byte("initrd"), constants.FilePerm)).ShouldNot(HaveOccurred())
for _, rootDir := range []string{"/some/dir", recoveryPath} {
bootDir := filepath.Join(rootDir, "boot")
Expect(utils.MkdirAll(fs, bootDir, constants.DirPerm)).ShouldNot(HaveOccurred())
recoveryImgPath := filepath.Join(bootDir, constants.RecoveryImgFile)
Expect(fs.WriteFile(recoveryImgPath, []byte("recovery"), constants.FilePerm)).ShouldNot(HaveOccurred())
Expect(utils.MkdirAll(fs, filepath.Join(rootDir, "lib/modules/6.6"), constants.DirPerm)).ShouldNot(HaveOccurred())
Expect(utils.MkdirAll(fs, bootDir, constants.DirPerm)).ShouldNot(HaveOccurred())
Expect(fs.WriteFile(filepath.Join(bootDir, "vmlinuz-6.6"), []byte("kernel"), constants.FilePerm)).ShouldNot(HaveOccurred())
Expect(fs.WriteFile(filepath.Join(bootDir, "elemental.initrd-6.6"), []byte("initrd"), constants.FilePerm)).ShouldNot(HaveOccurred())
}

spec, err := conf.NewUpgradeSpec(config.Config)
Expect(err).ShouldNot(HaveOccurred())

spec.System = types.NewDockerSrc("alpine")
spec.RecoveryUpgrade = true
spec.RecoverySystem.Source = spec.System
spec.RecoverySystem.Source = types.NewDirSrc("/some/dir")
spec.RecoverySystem.Size = 16

runner.SideEffect = func(command string, args ...string) ([]byte, error) {
if command == "mksquashfs" && args[1] == spec.RecoverySystem.File {
// create the transition img for squash to fake it
_, _ = fs.Create(spec.RecoverySystem.File)
_, err = fs.Create(spec.RecoverySystem.File)
Expect(err).To(Succeed())
}
return []byte{}, nil
}
Expand Down
8 changes: 2 additions & 6 deletions pkg/action/upgrade_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,14 +294,12 @@ var _ = Describe("Runtime Actions", func() {
Expect(runner.IncludesCmds([][]string{{"poweroff", "-f"}})).To(BeNil())
})
It("Successfully upgrades recovery from docker image", Label("docker"), func() {
recoveryImgPath := filepath.Join(constants.LiveDir, constants.RecoveryImgFile)
recoveryImgPath := filepath.Join(constants.LiveDir, constants.BootDir, constants.RecoveryImgFile)
spec := PrepareTestRecoveryImage(config, constants.LiveDir, fs, runner)

// This should be the old image
info, err := fs.Stat(recoveryImgPath)
Expect(err).ToNot(HaveOccurred())
// Image size should be empty
Expect(info.Size()).To(BeNumerically(">", 0))
Expect(info.IsDir()).To(BeFalse())
f, _ := fs.ReadFile(recoveryImgPath)
Expect(f).To(ContainSubstring("recovery"))
Expand All @@ -314,11 +312,9 @@ var _ = Describe("Runtime Actions", func() {
// This should be the new image
info, err = fs.Stat(recoveryImgPath)
Expect(err).ToNot(HaveOccurred())
// Image size should be empty
Expect(info.Size()).To(BeNumerically("==", 0))
Expect(info.IsDir()).To(BeFalse())
f, _ = fs.ReadFile(recoveryImgPath)
Expect(f).ToNot(ContainSubstring("recovery"))
Expect(f).To(BeEmpty())

// Transition squash should not exist
info, err = fs.Stat(spec.RecoverySystem.File)
Expand Down
Loading

0 comments on commit 127fa52

Please sign in to comment.