diff --git a/drivers/overlay/ov_network.go b/drivers/overlay/ov_network.go index 4dda2801fb..628a706933 100644 --- a/drivers/overlay/ov_network.go +++ b/drivers/overlay/ov_network.go @@ -588,6 +588,23 @@ func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error if err := sbox.AddInterface(vxlanName, "vxlan", sbox.InterfaceOptions().Master(brName)); err != nil { + // If adding vxlan device to the overlay namespace fails, remove the bridge interface we + // already added to the namespace. This allows the caller to try the setup again. + for _, iface := range sbox.Info().Interfaces() { + if iface.SrcName() == brName { + if ierr := iface.Remove(); ierr != nil { + logrus.Errorf("removing bridge failed from ov ns %v failed, %v", n.sbox.Key(), ierr) + } + } + } + + // Also, delete the vxlan interface. Since a global vni id is associated + // with the vxlan interface, an orphaned vxlan interface will result in + // failure of vxlan device creation if the vni is assigned to some other + // network. + if deleteErr := deleteInterface(vxlanName); deleteErr != nil { + logrus.Warnf("could not delete vxlan interface, %s, error %v, after config error, %v", vxlanName, deleteErr, err) + } return fmt.Errorf("vxlan interface creation failed for subnet %q: %v", s.subnetIP.String(), err) } @@ -629,6 +646,9 @@ func (n *network) initSubnetSandbox(s *subnet, restore bool) error { } } else { if err := n.setupSubnetSandbox(s, brName, vxlanName); err != nil { + // The error in setupSubnetSandbox could be a temporary glitch. reset the + // subnet once object to allow the setup to be retried on another endpoint join. + s.once = &sync.Once{} return err } } diff --git a/osl/interface_linux.go b/osl/interface_linux.go index 0ecda09f6e..a924af4bdf 100644 --- a/osl/interface_linux.go +++ b/osl/interface_linux.go @@ -289,6 +289,16 @@ func (n *networkNamespace) AddInterface(srcName, dstPrefix string, options ...If // Configure the interface now this is moved in the proper namespace. if err := configureInterface(nlh, iface, i); err != nil { + // If configuring the device fails move it back to the host namespace + // and change the name back to the source name. This allows the caller + // to properly cleanup the interface. Its important especially for + // interfaces with global attributes, ex: vni id for vxlan interfaces. + if nerr := nlh.LinkSetName(iface, i.SrcName()); nerr != nil { + logrus.Errorf("renaming interface (%s->%s) failed, %v after config error %v", i.DstName(), i.SrcName(), nerr, err) + } + if nerr := nlh.LinkSetNsFd(iface, ns.ParseHandlerInt()); nerr != nil { + logrus.Errorf("moving inteface %s to host ns failed, %v, after config error %v", i.SrcName(), nerr, err) + } return err }