Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VM live migrate #3767

Merged
merged 12 commits into from
Mar 5, 2024
Prev Previous commit
Next Next commit
fix migrated pod force deleted
Signed-off-by: bobz965 <zhangbingbing2_yewu@cmss.chinamobile.com>
zbb88888 committed Mar 5, 2024
commit de8469d9b4feb955e6a676423e035712ea02ff09
44 changes: 36 additions & 8 deletions mocks/pkg/ovs/interface.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

58 changes: 23 additions & 35 deletions pkg/controller/pod.go
Original file line number Diff line number Diff line change
@@ -641,11 +641,11 @@ func (c *Controller) reconcileAllocateSubnets(cachedPod, pod *v1.Pod, needAlloca
// todo: isVmPod, getPodType, getNameByPod has duplicated logic

var err error
var isMigrate, migrated, fail bool
var isMigrate, migrated, migratedFail bool
var vmKey, srcNodeName, targetNodeName string
if isVMPod && c.config.EnableKeepVMIP {
vmKey = fmt.Sprintf("%s/%s", namespace, vmName)
if isMigrate, migrated, fail, srcNodeName, targetNodeName, err = c.migrateVM(pod, vmKey); err != nil {
if isMigrate, migrated, migratedFail, srcNodeName, targetNodeName, err = c.migrateVM(pod, vmKey); err != nil {
klog.Error(err)
return nil, err
}
@@ -729,22 +729,32 @@ func (c *Controller) reconcileAllocateSubnets(cachedPod, pod *v1.Pod, needAlloca
DHCPv4OptionsUUID: subnet.Status.DHCPv4OptionsUUID,
DHCPv6OptionsUUID: subnet.Status.DHCPv6OptionsUUID,
}

if isVMPod && !isMigrate {
if err := c.OVNNbClient.CleanLogicalSwitchPortMigrateOptions(portName); err != nil {
err = fmt.Errorf("failed to clean migrate options for lsp %s, %v", portName, err)
klog.Error(err)
return nil, err
}
}
if err := c.OVNNbClient.CreateLogicalSwitchPort(subnet.Name, portName, ipStr, mac, podName, pod.Namespace,
portSecurity, securityGroupAnnotation, vips, podNet.Subnet.Spec.EnableDHCP, dhcpOptions, subnet.Spec.Vpc); err != nil {
c.recorder.Eventf(pod, v1.EventTypeWarning, "CreateOVNPortFailed", err.Error())
klog.Errorf("%v", err)
return nil, err
}

if vmKey != "" && isMigrate {
if isMigrate {
if migrated {
if err = c.cleanVMLSPMigrationOptions(portName, fail); err != nil {
klog.Infof("migrate end reset options for lsp %s from %s to %s, migrated fail: %t", portName, srcNodeName, targetNodeName, migratedFail)
if err := c.OVNNbClient.ResetLogicalSwitchPortMigrateOptions(portName, srcNodeName, targetNodeName, migratedFail); err != nil {
err = fmt.Errorf("failed to clean migrate options for lsp %s, %v", portName, err)
klog.Error(err)
return nil, err
}
} else {
if err = c.setVMLSPMigrationOptions(portName, srcNodeName, targetNodeName); err != nil {
klog.Infof("migrate start set options for lsp %s from %s to %s", portName, srcNodeName, targetNodeName)
if err := c.OVNNbClient.SetLogicalSwitchPortMigrateOptions(portName, srcNodeName, targetNodeName); err != nil {
err = fmt.Errorf("failed to set migrate options for lsp %s, %v", portName, err)
klog.Error(err)
return nil, err
}
@@ -1379,7 +1389,7 @@ func needAllocateSubnets(pod *v1.Pod, nets []*kubeovnNet) []*kubeovnNet {

migrate := false
if job, ok := pod.Annotations[util.MigrationJobAnnotation]; ok {
klog.Infof("migrate job %s for pod %s/%s", job, pod.Namespace, pod.Name)
klog.Infof("pod %s/%s is in the migration job %s", pod.Namespace, pod.Name, job)
migrate = true
}

@@ -2130,13 +2140,12 @@ func (c *Controller) migrateVM(pod *v1.Pod, vmKey string) (bool, bool, bool, str
// migrate true means need ovn set migrate options
// migrated ok means need set migrate options to target node
// migrated failed means need set migrate options to source node
if _, ok := pod.Annotations[util.MigrationJobAnnotation]; !ok {
return false, false, false, "", "", nil
}
if _, ok := pod.Annotations[util.MigrationSourceAnnotation]; ok {
if pod.Spec.NodeName == "" {
err := fmt.Errorf("source vm %s running pod %s should have node name", vmKey, pod.Name)
klog.Warning(err)
return false, false, false, "", "", nil
}
klog.Infof("prepare to migrate vm %s pod %s out from source node %s", vmKey, pod.Name, pod.Spec.NodeName)
klog.Infof("will migrate out vm %s pod %s from source node %s", vmKey, pod.Name, pod.Spec.NodeName)
// 这里可能还有一个状态能标志结束,源pod更新了一次
return false, false, false, "", "", nil
}
// ovn set migrator only in the process of target vm pod
@@ -2172,8 +2181,7 @@ func (c *Controller) migrateVM(pod *v1.Pod, vmKey string) (bool, bool, bool, str
return true, false, false, srcNode, targetNode, nil
}
if migratePhase == util.MigrationStateSucceeded {
klog.Infof("manage to migrate src vm %s from %s to %s", vmKey, srcNode, targetNode)

klog.Infof("succeed to migrate src vm %s from %s to %s", vmKey, srcNode, targetNode)
return true, true, false, srcNode, targetNode, nil
}
if migratePhase == util.MigrationStateFailed {
@@ -2183,23 +2191,3 @@ func (c *Controller) migrateVM(pod *v1.Pod, vmKey string) (bool, bool, bool, str

return false, false, false, "", "", nil
}

func (c *Controller) setVMLSPMigrationOptions(portName, srcNodeName, targetNodeName string) error {
klog.Infof("set migrate options for lsp %s", portName)
if err := c.OVNNbClient.SetLogicalSwitchPortMigrateOptions(portName, srcNodeName, targetNodeName); err != nil {
err = fmt.Errorf("failed to set migrate options for lsp %s, %v", portName, err)
klog.Error(err)
return err
}
return nil
}

func (c *Controller) cleanVMLSPMigrationOptions(portName string, fail bool) error {
klog.Infof("clean migrate options for lsp %s", portName)
if err := c.OVNNbClient.CleanLogicalSwitchPortMigrateOptions(portName, fail); err != nil {
err = fmt.Errorf("failed to clean migrate options for lsp %s, %v", portName, err)
klog.Error(err)
return err
}
return nil
}
3 changes: 2 additions & 1 deletion pkg/ovs/interface.go
Original file line number Diff line number Diff line change
@@ -93,7 +93,8 @@ type LogicalSwitchPort interface {
LogicalSwitchPortExists(name string) (bool, error)
// vm live migrate
SetLogicalSwitchPortMigrateOptions(lspName, srcNodeName, targetNodeName string) error
CleanLogicalSwitchPortMigrateOptions(lspName string, fail bool) error
ResetLogicalSwitchPortMigrateOptions(lspName, srcNodeName, targetNodeName string, migratedFail bool) error
CleanLogicalSwitchPortMigrateOptions(lspName string) error
}

type LoadBalancer interface {
93 changes: 69 additions & 24 deletions pkg/ovs/ovn-nb-logical_switch_port.go
Original file line number Diff line number Diff line change
@@ -551,7 +551,9 @@ func (c *OVNNbClient) SetLogicalSwitchPortVlanTag(lspName string, vlanID int) er
// UpdateLogicalSwitchPort update logical switch port
func (c *OVNNbClient) UpdateLogicalSwitchPort(lsp *ovnnb.LogicalSwitchPort, fields ...interface{}) error {
if lsp == nil {
return fmt.Errorf("logical_switch_port is nil")
err := fmt.Errorf("logical switch port is nil")
klog.Error(err)
return err
}

op, err := c.Where(lsp).Update(lsp, fields...)
@@ -561,6 +563,7 @@ func (c *OVNNbClient) UpdateLogicalSwitchPort(lsp *ovnnb.LogicalSwitchPort, fiel
}

if err = c.Transact("lsp-update", op); err != nil {
klog.Error(err)
return fmt.Errorf("update logical switch port %s: %v", lsp.Name, err)
}

@@ -820,20 +823,26 @@ func (c *OVNNbClient) SetLogicalSwitchPortMigrateOptions(lspName, srcNodeName, t
klog.Error(err)
return err
}
if lsp == nil {
err := fmt.Errorf("no migrator logical switch port %s", lspName)
klog.Error(err)
return err
}

if src == srcNodeName && target == targetNodeName {
// already set
return nil
}

requestedChassis := fmt.Sprintf("%s,%s", srcNodeName, targetNodeName)
klog.Infof("set logical switch port %s options requested-chassis=%s", lspName, requestedChassis)
if lsp.Options == nil {
lsp.Options = make(map[string]string)
}
lsp.Options["requested-chassis"] = requestedChassis
lsp.Options["activation-strategy"] = "rarp"
klog.Infof("set migrator logical switch port %s options: %v", lspName, lsp.Options)
if err := c.UpdateLogicalSwitchPort(lsp, &lsp.Options); err != nil {
err = fmt.Errorf("failed to set logical switch port %s options requested chassis %s: %v", lspName, requestedChassis, err)
err = fmt.Errorf("failed to set migrator logical switch port %s options requested chassis %s: %v", lspName, requestedChassis, err)
klog.Error(err)
return err
}
@@ -842,14 +851,14 @@ func (c *OVNNbClient) SetLogicalSwitchPortMigrateOptions(lspName, srcNodeName, t

// GetLogicalSwitchPortMigrateOptions get logical switch port src and target node name options of migrate
func (c *OVNNbClient) GetLogicalSwitchPortMigrateOptions(lspName string) (*ovnnb.LogicalSwitchPort, string, string, error) {
lsp, err := c.GetLogicalSwitchPort(lspName, true)
lsp, err := c.GetLogicalSwitchPort(lspName, false)
if err != nil {
err = fmt.Errorf("failed to get logical switch port %s: %v", lspName, err)
err = fmt.Errorf("failed to get migrator logical switch port %s: %v", lspName, err)
klog.Error(err)
return nil, "", "", err
}
if lsp == nil || lsp.Options == nil {
return nil, "", "", nil
if lsp.Options == nil {
return lsp, "", "", nil
}

requestedChassis, ok := lsp.Options["requested-chassis"]
@@ -862,28 +871,64 @@ func (c *OVNNbClient) GetLogicalSwitchPortMigrateOptions(lspName string) (*ovnnb
return nil, "", "", nil
}

func (c *OVNNbClient) ResetLogicalSwitchPortMigrateOptions(lspName, srcNodeName, targetNodeName string, migratedFail bool) error {
lsp, err := c.GetLogicalSwitchPort(lspName, false)
if err != nil {
err = fmt.Errorf("failed to get migrator logical switch port %s: %v", lspName, err)
klog.Error(err)
return err
}
if lsp.Options == nil {
klog.Infof("logical switch port %s has no options", lspName)
return nil
}
if _, ok := lsp.Options["requested-chassis"]; !ok {
klog.Infof("logical switch port %s has no migrator options", lspName)
return nil
}
if migratedFail {
// rollback
klog.Infof("reset migrator port %s to source node %s", lspName, srcNodeName)
lsp.Options["requested-chassis"] = srcNodeName
} else {
klog.Infof("reset migrator port %s to target node %s", lspName, targetNodeName)
lsp.Options["requested-chassis"] = targetNodeName
}
delete(lsp.Options, "activation-strategy")
klog.Infof("reset migrator logical switch port %s options: %v", lspName, lsp.Options)
if err := c.UpdateLogicalSwitchPort(lsp, &lsp.Options); err != nil {
err = fmt.Errorf("failed to reset options for migrator logical switch port %s: %v", lspName, err)
klog.Error(err)
return err
}
return nil
}

// CleanLogicalSwitchPortMigrateOptions clean logical switch port options of migration
func (c *OVNNbClient) CleanLogicalSwitchPortMigrateOptions(lspName string, fail bool) error {
lsp, src, target, err := c.GetLogicalSwitchPortMigrateOptions(lspName)
func (c *OVNNbClient) CleanLogicalSwitchPortMigrateOptions(lspName string) error {
lsp, err := c.GetLogicalSwitchPort(lspName, true)
if err != nil {
err = fmt.Errorf("failed to get migrator logical switch port %s: %v", lspName, err)
klog.Error(err)
return err
}
if src != "" && target != "" {
if fail {
// rollback
klog.Infof("rollback fail migrator port %s", lspName)
lsp.Options["requested-chassis"] = src
} else {
klog.Infof("set target migrator port %s", lspName)
lsp.Options["requested-chassis"] = target
}
delete(lsp.Options, "activation-strategy")
if err := c.UpdateLogicalSwitchPort(lsp, &lsp.Options); err != nil {
err = fmt.Errorf("failed to clean options for logical switch port %s : %v", lspName, err)
klog.Error(err)
return err
}
if lsp == nil {
return nil
}
if lsp.Options == nil {
return nil
}
if _, ok := lsp.Options["requested-chassis"]; !ok {
return nil
}
// migrated pod port has requested-chassis, if pod force deleted, the vm pod may schedule to another node
// if not clean the requested-chassis, the pod has no network connectivity
delete(lsp.Options, "requested-chassis")
klog.Infof("cleaned migrator logical switch port %s options: %v", lspName, lsp.Options)
if err := c.UpdateLogicalSwitchPort(lsp, &lsp.Options); err != nil {
err = fmt.Errorf("failed to clean options for migrator logical switch port %s: %v", lspName, err)
klog.Error(err)
return err
}
return nil
}