diff --git a/features.go b/features.go index b636466bfe4..c5dff4a2d17 100644 --- a/features.go +++ b/features.go @@ -63,6 +63,9 @@ var featuresCommand = cli.Command{ Enabled: &t, }, }, + NetDevices: &features.NetDevices{ + Enabled: &t, + }, }, PotentiallyUnsafeConfigAnnotations: []string{ "bundle", diff --git a/libcontainer/configs/config.go b/libcontainer/configs/config.go index 746c7190eb7..555e7586825 100644 --- a/libcontainer/configs/config.go +++ b/libcontainer/configs/config.go @@ -115,6 +115,9 @@ type Config struct { // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well! Devices []*devices.Device `json:"devices"` + // NetDevices are key-value pairs, keyed by network device name, moved to the container's network namespace. + NetDevices map[string]*LinuxNetDevice `json:"netDevices"` + MountLabel string `json:"mount_label"` // Hostname optionally sets the container's hostname if provided diff --git a/libcontainer/configs/netdevices.go b/libcontainer/configs/netdevices.go new file mode 100644 index 00000000000..09c38649e4e --- /dev/null +++ b/libcontainer/configs/netdevices.go @@ -0,0 +1,7 @@ +package configs + +// LinuxNetDevice represents a single network device to be added to the container's network namespace. +type LinuxNetDevice struct { + // Name of the device in the container namespace. + Name string `json:"name,omitempty"` +} diff --git a/libcontainer/configs/validate/validator.go b/libcontainer/configs/validate/validator.go index 7d17676ed18..c3f4f088fb3 100644 --- a/libcontainer/configs/validate/validator.go +++ b/libcontainer/configs/validate/validator.go @@ -24,6 +24,7 @@ func Validate(config *configs.Config) error { cgroupsCheck, rootfs, network, + netdevices, uts, security, namespaces, @@ -70,6 +71,45 @@ func rootfs(config *configs.Config) error { return nil } +// https://elixir.bootlin.com/linux/v6.12/source/net/core/dev.c#L1066 +func devValidName(name string) bool { + if len(name) == 0 || len(name) > unix.IFNAMSIZ { + return false + } + if (name == ".") || (name == "..") { + return false + } + if strings.ContainsAny(name, "/: ") { + return false + } + return true +} + +func netdevices(config *configs.Config) error { + if len(config.NetDevices) == 0 { + return nil + } + if !config.Namespaces.Contains(configs.NEWNET) { + return errors.New("unable to move network devices without a NET namespace") + } + if config.Namespaces.IsPrivate(configs.NEWNET) { + return errors.New("unable to move network devices without a NET namespace") + } + if config.RootlessEUID || config.RootlessCgroups { + return errors.New("network devices are not supported for rootless containers") + } + + for name, netdev := range config.NetDevices { + if !devValidName(name) { + return fmt.Errorf("invalid network device name %q", name) + } + if netdev.Name != "" && !devValidName(netdev.Name) { + return fmt.Errorf("invalid network device name %q", netdev.Name) + } + } + return nil +} + func network(config *configs.Config) error { if !config.Namespaces.Contains(configs.NEWNET) { if len(config.Networks) > 0 || len(config.Routes) > 0 { diff --git a/libcontainer/configs/validate/validator_test.go b/libcontainer/configs/validate/validator_test.go index d157feea5bc..fbbefebd625 100644 --- a/libcontainer/configs/validate/validator_test.go +++ b/libcontainer/configs/validate/validator_test.go @@ -3,6 +3,7 @@ package validate import ( "os" "path/filepath" + "strings" "testing" "github.com/opencontainers/runc/libcontainer/configs" @@ -877,3 +878,171 @@ func TestValidateIOPriority(t *testing.T) { } } } + +func TestValidateNetDevices(t *testing.T) { + testCases := []struct { + name string + isErr bool + config *configs.Config + }{ + { + name: "network device", + config: &configs.Config{ + Namespaces: configs.Namespaces( + []configs.Namespace{ + { + Type: configs.NEWNET, + Path: "/var/run/netns/blue", + }, + }, + ), + NetDevices: map[string]*configs.LinuxNetDevice{ + "eth0": {}, + }, + }, + }, + { + name: "network device rename", + config: &configs.Config{ + Namespaces: configs.Namespaces( + []configs.Namespace{ + { + Type: configs.NEWNET, + Path: "/var/run/netns/blue", + }, + }, + ), + NetDevices: map[string]*configs.LinuxNetDevice{ + "eth0": { + Name: "c0", + }, + }, + }, + }, + { + name: "network device host network", + isErr: true, + config: &configs.Config{ + Namespaces: configs.Namespaces( + []configs.Namespace{ + { + Type: configs.NEWNET, + Path: "", + }, + }, + ), + NetDevices: map[string]*configs.LinuxNetDevice{ + "eth0": {}, + }, + }, + }, + { + name: "network device network namespace empty", + isErr: true, + config: &configs.Config{ + Namespaces: configs.Namespaces( + []configs.Namespace{}, + ), + NetDevices: map[string]*configs.LinuxNetDevice{ + "eth0": {}, + }, + }, + }, + { + name: "network device rootless EUID", + isErr: true, + config: &configs.Config{ + Namespaces: configs.Namespaces( + []configs.Namespace{ + { + Type: configs.NEWNET, + Path: "/var/run/netns/blue", + }, + }, + ), + RootlessEUID: true, + NetDevices: map[string]*configs.LinuxNetDevice{ + "eth0": {}, + }, + }, + }, + { + name: "network device rootless", + isErr: true, + config: &configs.Config{ + Namespaces: configs.Namespaces( + []configs.Namespace{ + { + Type: configs.NEWNET, + Path: "/var/run/netns/blue", + }, + }, + ), + RootlessCgroups: true, + NetDevices: map[string]*configs.LinuxNetDevice{ + "eth0": {}, + }, + }, + }, + { + name: "network device bad name", + isErr: true, + config: &configs.Config{ + Namespaces: configs.Namespaces( + []configs.Namespace{ + { + Type: configs.NEWNET, + Path: "/var/run/netns/blue", + }, + }, + ), + NetDevices: map[string]*configs.LinuxNetDevice{ + "eth0": { + Name: "eth0/", + }, + }, + }, + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + config := tc.config + config.Rootfs = "/var" + + err := Validate(config) + if tc.isErr && err == nil { + t.Error("expected error, got nil") + } + + if !tc.isErr && err != nil { + t.Error(err) + } + }) + } +} + +func TestDevValidName(t *testing.T) { + testCases := []struct { + name string + valid bool + }{ + {name: "", valid: false}, + {name: "a", valid: true}, + {name: strings.Repeat("a", unix.IFNAMSIZ), valid: true}, + {name: strings.Repeat("a", unix.IFNAMSIZ+1), valid: false}, + {name: ".", valid: false}, + {name: "..", valid: false}, + {name: "dev/null", valid: false}, + {name: "valid:name", valid: false}, + {name: "valid name", valid: false}, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + if devValidName(tc.name) != tc.valid { + t.Fatalf("name %q, expected valid: %v", tc.name, tc.valid) + } + }) + } +} diff --git a/libcontainer/factory_linux.go b/libcontainer/factory_linux.go index 114cbf20e84..ba942d771a4 100644 --- a/libcontainer/factory_linux.go +++ b/libcontainer/factory_linux.go @@ -91,6 +91,17 @@ func Create(root, id string, config *configs.Config) (*Container, error) { if err := os.Mkdir(stateDir, 0o711); err != nil { return nil, err } + + // move the specified devices to the container network namespace + if nsPath := config.Namespaces.PathOf(configs.NEWNET); nsPath != "" { + for name, netDevice := range config.NetDevices { + err := netnsAttach(name, nsPath, *netDevice) + if err != nil { + return nil, err + } + } + } + c := &Container{ id: id, stateDir: stateDir, diff --git a/libcontainer/network_linux.go b/libcontainer/network_linux.go index 8915548b3bc..67b3f0095d6 100644 --- a/libcontainer/network_linux.go +++ b/libcontainer/network_linux.go @@ -9,7 +9,12 @@ import ( "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/types" + "github.com/sirupsen/logrus" "github.com/vishvananda/netlink" + "github.com/vishvananda/netlink/nl" + "github.com/vishvananda/netns" + + "golang.org/x/sys/unix" ) var strategies = map[string]networkStrategy{ @@ -98,3 +103,159 @@ func (l *loopback) attach(n *configs.Network) (err error) { func (l *loopback) detach(n *configs.Network) (err error) { return nil } + +// netnsAttach takes the network device referenced by name in the current network namespace +// and moves to the network namespace passed as a parameter. It also configure the +// network device inside the new network namespace with the passed parameters. +func netnsAttach(name string, nsPath string, device configs.LinuxNetDevice) error { + logrus.Debugf("attaching network device %s with attrs %#v to network namespace %s", name, device, nsPath) + link, err := netlink.LinkByName(name) + if err != nil { + return fmt.Errorf("link not found for interface %s on runtime namespace: %w", name, err) + } + + // set the interface down to change the attributes safely + err = netlink.LinkSetDown(link) + if err != nil { + return fmt.Errorf("fail to set link down: %w", err) + } + + // do interface rename and namespace change in the same operation to avoid + // possible conflicts with the interface name. + flags := unix.NLM_F_REQUEST | unix.NLM_F_ACK + req := nl.NewNetlinkRequest(unix.RTM_NEWLINK, flags) + + // Get a netlink socket in current namespace + s, err := nl.GetNetlinkSocketAt(netns.None(), netns.None(), unix.NETLINK_ROUTE) + if err != nil { + return fmt.Errorf("could not get network namespace handle: %w", err) + } + req.Sockets = map[int]*nl.SocketHandle{ + unix.NETLINK_ROUTE: {Socket: s}, + } + + // set the interface index + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(link.Attrs().Index) + req.AddData(msg) + + // set the interface name, rename if requested + newName := name + if device.Name != "" { + newName = device.Name + } + nameData := nl.NewRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(newName)) + req.AddData(nameData) + + // set the new network namespace + ns, err := netns.GetFromPath(nsPath) + if err != nil { + return fmt.Errorf("could not get network namespace from path %s for network device %s : %w", nsPath, name, err) + } + + val := nl.Uint32Attr(uint32(ns)) + attr := nl.NewRtAttr(unix.IFLA_NET_NS_FD, val) + req.AddData(attr) + + _, err = req.Execute(unix.NETLINK_ROUTE, 0) + if err != nil { + return fmt.Errorf("fail to move network device %s to network namespace %s: %w", name, nsPath, err) + } + + // to avoid golang problem with goroutines we create the socket in the + // namespace and use it directly + nhNs, err := netlink.NewHandleAt(ns) + if err != nil { + return err + } + + nsLink, err := nhNs.LinkByName(newName) + if err != nil { + return fmt.Errorf("link not found for interface %s on namespace %s : %w", newName, nsPath, err) + } + + err = nhNs.LinkSetUp(nsLink) + if err != nil { + return fmt.Errorf("fail to set up interface %s on namespace %s: %w", nsLink.Attrs().Name, nsPath, err) + } + + return nil +} + +// netnsDetach takes the network device referenced by name in the passed network namespace +// and moves to the root network namespace, restoring the original name. It also sets down +// the network device to avoid conflict with existing network configuration. +func netnsDetach(name string, nsPath string, device configs.LinuxNetDevice) error { + logrus.Debugf("detaching network device %s with attrs %#v to network namespace %s", name, device, nsPath) + ns, err := netns.GetFromPath(nsPath) + if err != nil { + return fmt.Errorf("could not get network namespace from path %s for network device %s : %w", nsPath, name, err) + } + // to avoid golang problem with goroutines we create the socket in the + // namespace and use it directly + nhNs, err := netlink.NewHandleAt(ns) + if err != nil { + return fmt.Errorf("could not get network namespace handle: %w", err) + } + + // set the new network namespace + rootNs, err := netns.Get() + if err != nil { + return fmt.Errorf("could not get current network namespace handle: %w", err) + } + defer rootNs.Close() + + // check the interface name, it could have been renamed + devName := device.Name + if devName == "" { + devName = name + } + + nsLink, err := nhNs.LinkByName(devName) + if err != nil { + return fmt.Errorf("link not found for interface %s on namespace %s: %w", device.Name, nsPath, err) + } + + // set the device down to avoid network conflicts + // when it is restored to the original namespace + err = nhNs.LinkSetDown(nsLink) + if err != nil { + return fmt.Errorf("fail to set interface down: %w", err) + } + + // do interface rename and namespace change in the same operation to avoid + // possible conflicts with the interface name. + flags := unix.NLM_F_REQUEST | unix.NLM_F_ACK + req := nl.NewNetlinkRequest(unix.RTM_NEWLINK, flags) + + // Get a netlink socket in current namespace + s, err := nl.GetNetlinkSocketAt(ns, rootNs, unix.NETLINK_ROUTE) + if err != nil { + return fmt.Errorf("could not get network namespace handle: %w", err) + } + req.Sockets = map[int]*nl.SocketHandle{ + unix.NETLINK_ROUTE: {Socket: s}, + } + + // set the interface index + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(nsLink.Attrs().Index) + req.AddData(msg) + + // restore the original name if it was renamed + if nsLink.Attrs().Name != name { + nameData := nl.NewRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(name)) + req.AddData(nameData) + } + + val := nl.Uint32Attr(uint32(rootNs)) + attr := nl.NewRtAttr(unix.IFLA_NET_NS_FD, val) + req.AddData(attr) + + _, err = req.Execute(unix.NETLINK_ROUTE, 0) + if err != nil { + return fmt.Errorf("fail to move back interface to current namespace: %w", err) + } + + return nil +} diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index c8446a56a10..ebacae1cf82 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -489,6 +489,14 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { } } + for name, netdev := range spec.Linux.NetDevices { + if config.NetDevices == nil { + config.NetDevices = make(map[string]*configs.LinuxNetDevice) + } + config.NetDevices[name] = &configs.LinuxNetDevice{ + Name: netdev.Name, + } + } } // Set the host UID that should own the container's cgroup. diff --git a/libcontainer/specconv/spec_linux_test.go b/libcontainer/specconv/spec_linux_test.go index c8d4ae464da..d20fd89625d 100644 --- a/libcontainer/specconv/spec_linux_test.go +++ b/libcontainer/specconv/spec_linux_test.go @@ -956,3 +956,64 @@ func TestCreateDevices(t *testing.T) { t.Errorf("device /dev/ram0 not found in config devices; got %v", conf.Devices) } } + +func TestCreateNetDevices(t *testing.T) { + testCases := []struct { + name string + netDevices map[string]specs.LinuxNetDevice + }{ + { + name: "no network devices", + }, + { + name: "one network devices", + netDevices: map[string]specs.LinuxNetDevice{ + "eth1": {}, + }, + }, + { + name: "multiple network devices", + netDevices: map[string]specs.LinuxNetDevice{ + "eth1": {}, + "eth2": {}, + }, + }, + { + name: "multiple network devices and rename", + netDevices: map[string]specs.LinuxNetDevice{ + "eth1": {}, + "eth2": { + Name: "ctr_eth2", + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + spec := Example() + spec.Linux.NetDevices = tc.netDevices + opts := &CreateOpts{ + CgroupName: "ContainerID", + UseSystemdCgroup: false, + Spec: spec, + } + config, err := CreateLibcontainerConfig(opts) + if err != nil { + t.Errorf("Couldn't create libcontainer config: %v", err) + } + if len(config.NetDevices) != len(opts.Spec.Linux.NetDevices) { + t.Fatalf("expected %d network devices and got %d", len(config.NetDevices), len(opts.Spec.Linux.NetDevices)) + } + for name, netdev := range config.NetDevices { + ctrNetDev, ok := config.NetDevices[name] + if !ok { + t.Fatalf("network device %s not found in the configuration", name) + } + if ctrNetDev.Name != netdev.Name { + t.Fatalf("expected %s got %s", ctrNetDev.Name, netdev.Name) + } + } + }) + } +} diff --git a/libcontainer/state_linux.go b/libcontainer/state_linux.go index 64f43e8fbae..75d598224a2 100644 --- a/libcontainer/state_linux.go +++ b/libcontainer/state_linux.go @@ -8,6 +8,7 @@ import ( "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) @@ -48,6 +49,22 @@ func destroy(c *Container) error { // Likely to fail when c.config.RootlessCgroups is true _ = signalAllProcesses(c.cgroupManager, unix.SIGKILL) } + + // restore network devices + nsPath := c.config.Namespaces.PathOf(configs.NEWNET) + + if nsPath != "" { + for name, netDevice := range c.config.NetDevices { + err := netnsDetach(name, nsPath, *netDevice) + if err != nil { + // don't fail on the interface detachment to avoid problems with the container shutdown. + // In the worst case the OS will handle the cleanup, hardware interfaces will be back on the + // root namespace and virtual devices will be destroyed. + logrus.WithError(err).Warnf("failed to restore network device %s from network namespace %s", name, nsPath) + } + } + } + if err := c.cgroupManager.Destroy(); err != nil { return fmt.Errorf("unable to remove container's cgroup: %w", err) } diff --git a/tests/integration/netdev.bats b/tests/integration/netdev.bats new file mode 100644 index 00000000000..38444f01525 --- /dev/null +++ b/tests/integration/netdev.bats @@ -0,0 +1,66 @@ +#!/usr/bin/env bats + +load helpers + +function setup() { + requires root + setup_busybox + # create a dummy interface to move to the container + ip link add dummy0 type dummy + ip link set up dev dummy0 + ip addr add 169.254.169.13/32 dev dummy0 +} + +function teardown() { + ip link del dev dummy0 + teardown_bundle +} + +@test "move network device to container network namespace" { + update_config ' .linux.netDevices |= {"dummy0": {} } + | .process.args |= ["ip", "address", "show", "dev", "dummy0"]' + + # create a temporary name for the test network namespace + tmp=$(mktemp) + rm -f "$tmp" + ns_name=$(basename "$tmp") + # create network namespace + ip netns add "$ns_name" + ns_path=$(ip netns add "$ns_name" 2>&1 | sed -e 's/.*"\(.*\)".*/\1/') + + # tell runc which network namespace to use + update_config '(.. | select(.type? == "network")) .path |= "'"$ns_path"'"' + + runc run test_busybox + [ "$status" -eq 0 ] + + ip netns del "$ns_name" +} + +@test "move network device to container network namespace and rename" { + update_config ' .linux.netDevices |= { "dummy0": { "name" : "ctr_dummy0" } } + | .process.args |= ["ip", "address", "show", "dev", "ctr_dummy0"]' + + # create a temporary name for the test network namespace + tmp=$(mktemp) + rm -f "$tmp" + ns_name=$(basename "$tmp") + # create network namespace + ip netns add "$ns_name" + ns_path=$(ip netns add "$ns_name" 2>&1 | sed -e 's/.*"\(.*\)".*/\1/') + + # tell runc which network namespace to use + update_config '(.. | select(.type? == "network")) .path |= "'"$ns_path"'"' + + runc run test_busybox + [ "$status" -eq 0 ] + + ip netns del "$ns_name" +} + +@test "network device on root namespace fails" { + update_config ' .linux.netDevices |= {"dummy0": {} }' + runc run test_busybox + [ "$status" -ne 0 ] + [[ "$output" == *"unable to move network devices without a NET namespace"* ]] +}