Skip to content

Commit f7574c1

Browse files
zbb88888oilbeater
andcommitted
VM live migrate (#3767)
optimize vm live migrate --------- Signed-off-by: bobz965 <zhangbingbing2_yewu@cmss.chinamobile.com> Co-authored-by: Oilbeater <liumengxinfly@gmail.com>
1 parent 39621e7 commit f7574c1

File tree

7 files changed

+349
-18
lines changed

7 files changed

+349
-18
lines changed

mocks/pkg/ovs/interface.go

+84
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/controller/node.go

+1-3
Original file line numberDiff line numberDiff line change
@@ -585,9 +585,7 @@ func (c *Controller) updateProviderNetworkForNodeDeletion(pn *kubeovnv1.Provider
585585
}
586586
}
587587
if changed {
588-
if newPn == nil {
589-
newPn = pn.DeepCopy()
590-
}
588+
newPn = pn.DeepCopy()
591589
newPn.Spec.CustomInterfaces = customInterfaces
592590
}
593591
if newPn != nil {

pkg/controller/pod.go

+113-6
Original file line numberDiff line numberDiff line change
@@ -611,7 +611,6 @@ func (c *Controller) handleAddOrUpdatePod(key string) (err error) {
611611
return nil
612612
}
613613
pod = cachedPod.DeepCopy()
614-
// check if allocate subnet is need. also allocate subnet when hotplug nic
615614
needAllocatePodNets := needAllocateSubnets(pod, podNets)
616615
if len(needAllocatePodNets) != 0 {
617616
if cachedPod, err = c.reconcileAllocateSubnets(cachedPod, pod, needAllocatePodNets); err != nil {
@@ -641,6 +640,16 @@ func (c *Controller) reconcileAllocateSubnets(cachedPod, pod *v1.Pod, needAlloca
641640
podName := c.getNameByPod(pod)
642641
// todo: isVmPod, getPodType, getNameByPod has duplicated logic
643642

643+
var err error
644+
var isMigrate, migrated, migratedFail bool
645+
var vmKey, srcNodeName, targetNodeName string
646+
if isVMPod && c.config.EnableKeepVMIP {
647+
vmKey = fmt.Sprintf("%s/%s", namespace, vmName)
648+
if isMigrate, migrated, migratedFail, srcNodeName, targetNodeName, err = c.migrateVM(pod, vmKey); err != nil {
649+
klog.Error(err)
650+
return nil, err
651+
}
652+
}
644653
// Avoid create lsp for already running pod in ovn-nb when controller restart
645654
for _, podNet := range needAllocatePodNets {
646655
// the subnet may changed when alloc static ip from the latter subnet after ns supports multi subnets
@@ -669,10 +678,11 @@ func (c *Controller) reconcileAllocateSubnets(cachedPod, pod *v1.Pod, needAlloca
669678
delete(pod.Annotations, fmt.Sprintf(util.PodNicAnnotationTemplate, podNet.ProviderName))
670679
}
671680
pod.Annotations[fmt.Sprintf(util.AllocatedAnnotationTemplate, podNet.ProviderName)] = "true"
672-
if isVMPod && c.config.EnableKeepVMIP {
681+
682+
if vmKey != "" {
673683
pod.Annotations[fmt.Sprintf(util.VMTemplate, podNet.ProviderName)] = vmName
674684
if err := c.changeVMSubnet(vmName, namespace, podNet.ProviderName, subnet.Name); err != nil {
675-
klog.Errorf("change subnet of pod %s/%s to %s failed: %v", namespace, name, subnet.Name, err)
685+
klog.Errorf("vm %s change subnet to %s failed: %v", vmKey, subnet.Name, err)
676686
return nil, err
677687
}
678688
}
@@ -719,13 +729,31 @@ func (c *Controller) reconcileAllocateSubnets(cachedPod, pod *v1.Pod, needAlloca
719729
DHCPv4OptionsUUID: subnet.Status.DHCPv4OptionsUUID,
720730
DHCPv6OptionsUUID: subnet.Status.DHCPv6OptionsUUID,
721731
}
722-
723-
if err := c.OVNNbClient.CreateLogicalSwitchPort(subnet.Name, portName, ipStr, mac, podName, pod.Namespace, portSecurity, securityGroupAnnotation, vips, podNet.Subnet.Spec.EnableDHCP, dhcpOptions, subnet.Spec.Vpc); err != nil {
732+
if err := c.OVNNbClient.CreateLogicalSwitchPort(subnet.Name, portName, ipStr, mac, podName, pod.Namespace,
733+
portSecurity, securityGroupAnnotation, vips, podNet.Subnet.Spec.EnableDHCP, dhcpOptions, subnet.Spec.Vpc); err != nil {
724734
c.recorder.Eventf(pod, v1.EventTypeWarning, "CreateOVNPortFailed", err.Error())
725735
klog.Errorf("%v", err)
726736
return nil, err
727737
}
728738

739+
if isMigrate {
740+
if migrated {
741+
klog.Infof("migrate end reset options for lsp %s from %s to %s, migrated fail: %t", portName, srcNodeName, targetNodeName, migratedFail)
742+
if err := c.OVNNbClient.ResetLogicalSwitchPortMigrateOptions(portName, srcNodeName, targetNodeName, migratedFail); err != nil {
743+
err = fmt.Errorf("failed to clean migrate options for lsp %s, %v", portName, err)
744+
klog.Error(err)
745+
return nil, err
746+
}
747+
} else {
748+
klog.Infof("migrate start set options for lsp %s from %s to %s", portName, srcNodeName, targetNodeName)
749+
if err := c.OVNNbClient.SetLogicalSwitchPortMigrateOptions(portName, srcNodeName, targetNodeName); err != nil {
750+
err = fmt.Errorf("failed to set migrate options for lsp %s, %v", portName, err)
751+
klog.Error(err)
752+
return nil, err
753+
}
754+
}
755+
}
756+
729757
if pod.Annotations[fmt.Sprintf(util.Layer2ForwardAnnotationTemplate, podNet.ProviderName)] == "true" {
730758
if err := c.OVNNbClient.EnablePortLayer2forward(portName); err != nil {
731759
c.recorder.Eventf(pod, v1.EventTypeWarning, "SetOVNPortL2ForwardFailed", err.Error())
@@ -1008,6 +1036,19 @@ func (c *Controller) handleDeletePod(key string) error {
10081036
}
10091037
isVMPod, vmName := isVMPod(pod)
10101038
if isVMPod && c.config.EnableKeepVMIP {
1039+
ports, err := c.OVNNbClient.ListNormalLogicalSwitchPorts(true, map[string]string{"pod": podKey})
1040+
if err != nil {
1041+
klog.Errorf("failed to list lsps of pod '%s', %v", pod.Name, err)
1042+
return err
1043+
}
1044+
for _, port := range ports {
1045+
klog.Infof("clean migrate options for vm lsp %s", port.Name)
1046+
if err := c.OVNNbClient.CleanLogicalSwitchPortMigrateOptions(port.Name); err != nil {
1047+
err = fmt.Errorf("failed to clean migrate options for vm lsp %s, %v", port.Name, err)
1048+
klog.Error(err)
1049+
return err
1050+
}
1051+
}
10111052
vmToBeDel := c.isVMToDel(pod, vmName)
10121053
isDelete, err := appendCheckPodToDel(c, pod, vmName, util.VMInstance)
10131054
if pod.DeletionTimestamp != nil {
@@ -1341,6 +1382,9 @@ func getNextHopByTunnelIP(gw []net.IP) string {
13411382
}
13421383

13431384
func needAllocateSubnets(pod *v1.Pod, nets []*kubeovnNet) []*kubeovnNet {
1385+
// check if allocate from subnet is need.
1386+
// allocate subnet when change subnet to hotplug nic
1387+
// allocate subnet when migrate vm
13441388
if !isPodAlive(pod) {
13451389
return nil
13461390
}
@@ -1349,9 +1393,15 @@ func needAllocateSubnets(pod *v1.Pod, nets []*kubeovnNet) []*kubeovnNet {
13491393
return nets
13501394
}
13511395

1396+
migrate := false
1397+
if job, ok := pod.Annotations[util.MigrationJobAnnotation]; ok {
1398+
klog.Infof("pod %s/%s is in the migration job %s", pod.Namespace, pod.Name, job)
1399+
migrate = true
1400+
}
1401+
13521402
result := make([]*kubeovnNet, 0, len(nets))
13531403
for _, n := range nets {
1354-
if pod.Annotations[fmt.Sprintf(util.AllocatedAnnotationTemplate, n.ProviderName)] != "true" {
1404+
if migrate || pod.Annotations[fmt.Sprintf(util.AllocatedAnnotationTemplate, n.ProviderName)] != "true" {
13551405
result = append(result, n)
13561406
}
13571407
}
@@ -2089,3 +2139,60 @@ func (c *Controller) getVirtualIPs(pod *v1.Pod, podNets []*kubeovnNet) map[strin
20892139
}
20902140
return vipsMap
20912141
}
2142+
2143+
// migrate vm return migrate, migrated, fail, src node, target node, err
2144+
func (c *Controller) migrateVM(pod *v1.Pod, vmKey string) (bool, bool, bool, string, string, error) {
2145+
// try optimize vm migration, no need return error
2146+
// migrate true means need ovn set migrate options
2147+
// migrated ok means need set migrate options to target node
2148+
// migrated failed means need set migrate options to source node
2149+
if _, ok := pod.Annotations[util.MigrationJobAnnotation]; !ok {
2150+
return false, false, false, "", "", nil
2151+
}
2152+
if _, ok := pod.Annotations[util.MigrationSourceAnnotation]; ok {
2153+
klog.Infof("will migrate out vm %s pod %s from source node %s", vmKey, pod.Name, pod.Spec.NodeName)
2154+
return false, false, false, "", "", nil
2155+
}
2156+
// ovn set migrator only in the process of target vm pod
2157+
if _, ok := pod.Annotations[util.MigrationTargetAnnotation]; !ok {
2158+
return false, false, false, "", "", nil
2159+
}
2160+
srcNode, ok := pod.Annotations[util.MigrationSourceNodeAnnotation]
2161+
if !ok || srcNode == "" {
2162+
err := fmt.Errorf("vm %s migration source node is not set", vmKey)
2163+
klog.Warning(err)
2164+
return false, false, false, "", "", nil
2165+
}
2166+
targetNode := pod.Spec.NodeName
2167+
if targetNode == "" {
2168+
err := fmt.Errorf("vm %s migration target node is not set", vmKey)
2169+
klog.Warning(err)
2170+
return false, false, false, "", "", nil
2171+
}
2172+
migratePhase, ok := pod.Annotations[util.MigrationPhaseAnnotation]
2173+
if !ok {
2174+
err := fmt.Errorf("vm %s migration phase is not set", vmKey)
2175+
klog.Warning(err)
2176+
return false, false, false, "", "", nil
2177+
}
2178+
// check migrate phase
2179+
if migratePhase == "" {
2180+
err := fmt.Errorf("vm %s migration phase is empty", vmKey)
2181+
klog.Warning(err)
2182+
return false, false, false, "", "", nil
2183+
}
2184+
if migratePhase == util.MigrationPhaseStarted {
2185+
klog.Infof("start to migrate src vm %s from %s to %s", vmKey, srcNode, targetNode)
2186+
return true, false, false, srcNode, targetNode, nil
2187+
}
2188+
if migratePhase == util.MigrationPhaseSucceeded {
2189+
klog.Infof("succeed to migrate src vm %s from %s to %s", vmKey, srcNode, targetNode)
2190+
return true, true, false, srcNode, targetNode, nil
2191+
}
2192+
if migratePhase == util.MigrationPhaseFailed {
2193+
klog.Infof("failed to migrate src vm %s from %s to %s", vmKey, srcNode, targetNode)
2194+
return true, true, true, srcNode, targetNode, nil
2195+
}
2196+
2197+
return false, false, false, "", "", nil
2198+
}

pkg/ovs/interface.go

+4
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ type LogicalSwitchPort interface {
9090
ListLogicalSwitchPortsWithLegacyExternalIDs() ([]ovnnb.LogicalSwitchPort, error)
9191
GetLogicalSwitchPort(lspName string, ignoreNotFound bool) (*ovnnb.LogicalSwitchPort, error)
9292
LogicalSwitchPortExists(name string) (bool, error)
93+
// vm live migrate
94+
SetLogicalSwitchPortMigrateOptions(lspName, srcNodeName, targetNodeName string) error
95+
ResetLogicalSwitchPortMigrateOptions(lspName, srcNodeName, targetNodeName string, migratedFail bool) error
96+
CleanLogicalSwitchPortMigrateOptions(lspName string) error
9397
}
9498

9599
type LoadBalancer interface {

pkg/ovs/ovn-nb-load_balancer.go

+3-6
Original file line numberDiff line numberDiff line change
@@ -152,12 +152,9 @@ func (c *OVNNbClient) LoadBalancerDeleteVip(lbName, vipEndpoint string, ignoreHe
152152
klog.Errorf("failed to delete lb ip port mapping: %v", err)
153153
return err
154154
}
155-
156-
if lbhc != nil {
157-
if err = c.LoadBalancerDeleteHealthCheck(lbName, lbhc.UUID); err != nil {
158-
klog.Errorf("failed to delete lb health check: %v", err)
159-
return err
160-
}
155+
if err = c.LoadBalancerDeleteHealthCheck(lbName, lbhc.UUID); err != nil {
156+
klog.Errorf("failed to delete lb health check: %v", err)
157+
return err
161158
}
162159
}
163160
if lb == nil || len(lb.Vips) == 0 {

0 commit comments

Comments
 (0)