Skip to content

Commit

Permalink
Retry setting VF MAC address
Browse files Browse the repository at this point in the history
Some NIC drivers (i.e. i40e/iavf) set their VF MAC addressing
asynchronously when set administratively. This means that while the PF
could already show the VF with the desired MAC address, the netdev VF
may still have the original one. If in this window we issue a netdev VF
MAC address set, the driver will return an error and the pod will fail
to create.

One way to fix this issue would be to not try to set the netdev VF MAC
address, rather simply rely on the MAC address set administratively
already in place. However, other NIC drivers (i.e. mlx5_core) do not
propagate the MAC address down to the netdev VF so for those drivers we
have to continue setting the VF MAC address the same way (via PF and
netdev VF).

This commit addresses the issue with a retry where it waits up to 1
second (5 retries * 200 millisecond sleep) in case driver is still
working on propagating the MAC address change down to the VF.

ResetVFConfig resets a VF administratively. We must run ResetVFConfig
before ReleaseVF because some drivers will error out if we try to reset
netdev VF with trust off. So, reset VF MAC address via PF first.

Signed-off-by: Carlos Goncalves <[email protected]>
  • Loading branch information
cgoncalves committed Nov 24, 2022
1 parent 7eb235b commit 3669d1f
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 6 deletions.
12 changes: 8 additions & 4 deletions cmd/sriov/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,14 @@ func cmdDel(args *skel.CmdArgs) error {

sm := sriov.NewSriovManager()

/* ResetVFConfig resets a VF administratively. We must run ResetVFConfig
before ReleaseVF because some drivers will error out if we try to
reset netdev VF with trust off. So, reset VF MAC address via PF first.
*/
if err := sm.ResetVFConfig(netConf); err != nil {
return fmt.Errorf("cmdDel() error reseting VF: %q", err)
}

if !netConf.DPDKMode {
netns, err := ns.GetNS(args.Netns)
if err != nil {
Expand All @@ -243,10 +251,6 @@ func cmdDel(args *skel.CmdArgs) error {
}
}

if err := sm.ResetVFConfig(netConf); err != nil {
return fmt.Errorf("cmdDel() error reseting VF: %q", err)
}

// Mark the pci address as released
allocator := utils.NewPCIAllocator(config.DefaultCNIDir)
if err = allocator.DeleteAllocatedPCI(netConf.DeviceID); err != nil {
Expand Down
31 changes: 29 additions & 2 deletions pkg/sriov/sriov.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package sriov
import (
"fmt"
"net"
"time"

"github.com/containernetworking/plugins/pkg/ns"

Expand Down Expand Up @@ -91,7 +92,20 @@ func (s *sriovManager) SetupVF(conf *sriovtypes.NetConf, podifName string, cid s
// Save the original effective MAC address before overriding it
conf.OrigVfState.EffectiveMAC = linkObj.Attrs().HardwareAddr.String()

if err = s.nLink.LinkSetHardwareAddr(linkObj, hwaddr); err != nil {
/* Some NIC drivers (i.e. i40e/iavf) set VF MAC address asynchronously
via PF. This means that while the PF could already show the VF with
the desired MAC address, the netdev VF may still have the original
one. If in this window we issue a netdev VF MAC address set, the driver
will return an error and the pod will fail to create.
Other NICs (Mellanox) require explicit netdev VF MAC address so we
cannot skip this part.
Retry up to 5 times; wait 200 milliseconds between retries
*/
err = utils.Retry(5, 200*time.Millisecond, func() error {
return s.nLink.LinkSetHardwareAddr(linkObj, hwaddr)
})

if err != nil {
return "", fmt.Errorf("failed to set netlink MAC address to %s: %v", hwaddr, err)
}
macAddress = conf.MAC
Expand Down Expand Up @@ -332,7 +346,20 @@ func (s *sriovManager) ResetVFConfig(conf *sriovtypes.NetConf) error {
if err != nil {
return fmt.Errorf("failed to parse original administrative MAC address %s: %v", conf.OrigVfState.AdminMAC, err)
}
if err = s.nLink.LinkSetVfHardwareAddr(pfLink, conf.VFID, hwaddr); err != nil {

/* Some NIC drivers (i.e. i40e/iavf) set VF MAC address asynchronously
via PF. This means that while the PF could already show the VF with
the desired MAC address, the netdev VF may still have the original
one. If in this window we issue a netdev VF MAC address set, the driver
will return an error and the pod will fail to create.
Other NICs (Mellanox) require explicit netdev VF MAC address so we
cannot skip this part.
Retry up to 5 times; wait 200 milliseconds between retries
*/
err = utils.Retry(5, 200*time.Millisecond, func() error {
return s.nLink.LinkSetVfHardwareAddr(pfLink, conf.VFID, hwaddr)
})
if err != nil {
return fmt.Errorf("failed to restore original administrative MAC address %s: %v", hwaddr, err)
}
}
Expand Down
14 changes: 14 additions & 0 deletions pkg/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"path/filepath"
"strconv"
"strings"
"time"
)

var (
Expand Down Expand Up @@ -314,3 +315,16 @@ func IsIPv4(ip net.IP) bool {
func IsIPv6(ip net.IP) bool {
return ip.To4() == nil && ip.To16() != nil
}

// Retry retries a given function until no return error; times out after retries*sleep
func Retry(retries int, sleep time.Duration, f func() error) error {
err := error(nil)
for retry := 0; retry < retries; retry++ {
err = f()
if err == nil {
return nil
}
time.Sleep(sleep)
}
return err
}
13 changes: 13 additions & 0 deletions pkg/utils/utils_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package utils

import (
"errors"
"time"

. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
Expand Down Expand Up @@ -78,4 +81,14 @@ var _ = Describe("Utils", func() {
Expect(err).To(HaveOccurred(), "Not existing VF should return an error")
})
})
Context("Checking Retry functon", func() {
It("Assuming calling function fails", func() {
err := Retry(5, 10*time.Millisecond, func() error { return errors.New("") })
Expect(err).To((HaveOccurred()), "Retry should return an error")
})
It("Assuming calling function does not fail", func() {
err := Retry(5, 10*time.Millisecond, func() error { return nil })
Expect(err).NotTo((HaveOccurred()), "Retry should not return an error")
})
})
})

0 comments on commit 3669d1f

Please sign in to comment.