diff --git a/.github/buildomat/jobs/test.sh b/.github/buildomat/jobs/test.sh index fc77c9ee..00262f91 100755 --- a/.github/buildomat/jobs/test.sh +++ b/.github/buildomat/jobs/test.sh @@ -81,7 +81,3 @@ pfexec add_drv xde banner "test" pfexec chmod +x /input/xde/work/test/loopback pfexec /input/xde/work/test/loopback --nocapture - -pfexec add_drv xde || true -pfexec chmod +x /input/xde/work/test/flowpin -pfexec /input/xde/work/test/flowpin --nocapture diff --git a/.github/buildomat/jobs/xde.sh b/.github/buildomat/jobs/xde.sh index 5f05b6bf..7d058360 100755 --- a/.github/buildomat/jobs/xde.sh +++ b/.github/buildomat/jobs/xde.sh @@ -133,10 +133,3 @@ loopback_test=$( ) mkdir -p /work/test cp $loopback_test /work/test/loopback - -cargo build --test flowpin -flowpin_test=$( - cargo build -q --test flowpin --message-format=json |\ - jq -r "select(.profile.test == true) | .filenames[]" -) -cp $flowpin_test /work/test/flowpin diff --git a/crates/opte-api/src/ip.rs b/crates/opte-api/src/ip.rs index c6988251..99cff17b 100644 --- a/crates/opte-api/src/ip.rs +++ b/crates/opte-api/src/ip.rs @@ -650,6 +650,10 @@ impl Ipv6Addr { self.inner } + pub fn prefix_match(&self, prefix: u128) -> bool { + (u128::from_be_bytes(self.inner) & prefix) != 0 + } + /// Return the address after applying the network mask. pub fn mask(mut self, mask: u8) -> Result { if mask > 128 { @@ -703,6 +707,11 @@ impl Ipv6Addr { ], } } + + pub fn has_prefix(&self, prefix: u128, len: u8) -> bool { + let mask = ((1u128 << len) - 1) << (128 - len); + (mask & u128::from_be_bytes(self.inner)) == prefix + } } impl fmt::Display for Ipv6Addr { diff --git a/crates/opte-api/src/lib.rs b/crates/opte-api/src/lib.rs index 4788aa50..6a0c4f8f 100644 --- a/crates/opte-api/src/lib.rs +++ b/crates/opte-api/src/lib.rs @@ -58,7 +58,7 @@ pub use ulp::*; /// /// We rely on CI and the check-api-version.sh script to verify that /// this number is incremented anytime the oxide-api code changes. -pub const API_VERSION: u64 = 25; +pub const API_VERSION: u64 = 24; #[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)] pub enum Direction { diff --git a/lib/oxide-vpc/src/api.rs b/lib/oxide-vpc/src/api.rs index 93911974..024be5d8 100644 --- a/lib/oxide-vpc/src/api.rs +++ b/lib/oxide-vpc/src/api.rs @@ -453,7 +453,7 @@ impl Display for RouterTarget { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::Drop => write!(f, "Drop"), - Self::InternetGateway => write!(f, "ig"), + Self::InternetGateway => write!(f, "IG"), Self::Ip(IpAddr::Ip4(ip4)) => write!(f, "ip4={}", ip4), Self::Ip(IpAddr::Ip6(ip6)) => write!(f, "ip6={}", ip6), Self::VpcSubnet(IpCidr::Ip4(sub4)) => write!(f, "sub4={}", sub4), diff --git a/lib/oxide-vpc/src/engine/overlay.rs b/lib/oxide-vpc/src/engine/overlay.rs index 4986cca9..c192948f 100644 --- a/lib/oxide-vpc/src/engine/overlay.rs +++ b/lib/oxide-vpc/src/engine/overlay.rs @@ -521,13 +521,26 @@ pub struct Virt2Phys { /// A mapping from virtual IPs to boundary services addresses. pub struct Virt2Boundary { + // The BTreeMap-based representation of the v2b table is a representation + // that is easily updated. ip4: KMutex>, ip6: KMutex>, + + // The Poptrie-based representation of the v2b table is a data structure + // optimized for fast query times. It's not easily updated in-place. It's + // rebuilt each time an update is made. The heuristic being applied here is + // we expect table churn to be highly-infrequent compared to lookups. + // Lookups may happen millions of times per second and and we want those to + // be as fast as possible. At the time of writing, poptrie is the fastest + // LPM lookup data structure known to the author. + // + // The poptrie is under an read-write lock to allow multiple concurrent + // readers. When we update we hold the lock just long enough to do a swap + // with a poptrie that was pre-built out of band. pt4: KRwLock>, pt6: KRwLock>, } -pub const VIRT_2_BOUNDARY_NAME: &str = "Virt2Boundary"; pub const BOUNDARY_SERVICES_VNI: u32 = 99u32; impl Virt2Boundary { @@ -607,16 +620,16 @@ impl Virt2Boundary { pub fn set( &self, vip: IpCidr, - phys: TunnelEndpoint, + tep: TunnelEndpoint, ) -> Option { match vip { IpCidr::Ip4(ip4) => { - let e = self.ip4.lock().insert(ip4, phys); + let e = self.ip4.lock().insert(ip4, tep); self.update_poptrie_v4(); e } IpCidr::Ip6(ip6) => { - let e = self.ip6.lock().insert(ip6, phys); + let e = self.ip6.lock().insert(ip6, tep); self.update_poptrie_v6(); e } diff --git a/preflight.sh b/preflight.sh index a21017b9..e3e652bc 100755 --- a/preflight.sh +++ b/preflight.sh @@ -1,5 +1,8 @@ #!/bin/bash +# This script can be helpful for catching issues locally before paying the CI +# tax. + ./.github/buildomat/jobs/opte.sh ./.github/buildomat/jobs/opteadm.sh ./.github/buildomat/jobs/test.sh diff --git a/xde-tests/tests/flowpin.rs b/xde-tests/tests/flowpin.rs deleted file mode 100644 index e65cd9ab..00000000 --- a/xde-tests/tests/flowpin.rs +++ /dev/null @@ -1,119 +0,0 @@ -use anyhow::Result; -use xde_tests::brand; -use xde_tests::run_topo; -use xde_tests::OptePort; -use xde_tests::OpteZone; -use xde_tests::SoftnpuZone; -use xde_tests::Xde; -use ztest::*; - -/// This topology tests OPTE flow pinning. The objective is to demonstrate that -/// overlay flows going through boundary services have an affinity to a physical -/// underlay path. -/// -/// There are two boundary switches and one upstream. In the test, the xde -/// device sends TCP, UDP and ICMP packets to the upstream through the boundary -/// switches acting as Geneve tunnel endpoints (GTEPs). -/// -/// ┌──────────┐ -/// │ upstream │ -/// └──┬────┬──┘ -/// ┌────┘ └────┐ -/// │ │ -/// ┌─────────┐ ┌─────────┐ -/// │boundary0│ │boundary1│ -/// └─────────┘ └─────────┘ -/// │ │ -/// └─────┐ ┌──────┘ -/// ┌─┴─┴─┐ -/// │ xde │ -/// └─────┘ -#[test] -fn test_xde_flow_pinning() -> Result<()> { - // Create underlay topology with simnet links. - let x0_b00 = SimnetLink::new("x0", "b0_0")?; - let x1_b10 = SimnetLink::new("x1", "b0_1")?; - let u0_b01 = SimnetLink::new("u0", "b1_0")?; - let u1_b11 = SimnetLink::new("u1", "b1_1")?; - - /* - let _x0ll = LinkLocal::new(&x0_b00.end_a, "ll")?; - let _x1ll = LinkLocal::new(&x1_b10.end_a, "ll")?; - let b00ll = LinkLocal::new(&x0_b00.end_b, "ll")?; - let b10ll = LinkLocal::new(&x1_b10.end_b, "ll")?; - */ - - Xde::set_xde_underlay(&x0_b00.end_a, &x1_b10.end_a)?; - // TODO this is a sort of force unset underlay until we have an unset - // underlay command. When this object drops it will remove the xde driver. - // If we do not do this, xde will hold references to the simnet devices - // preventing us from cleaning them up after this test. - let _xde = Xde {}; - - // Set up the virtual to physical mapptings for this test run. - Xde::set_v2p("10.0.0.1", "a8:40:25:ff:00:01", "fd44::1")?; - - let opte0 = - OptePort::new("opte0", "10.0.0.1", "a8:40:25:ff:00:01", "fd44::1")?; - opte0.add_router_entry("10.0.0.2")?; - opte0.fw_allow_all()?; - - //let _r0 = RouteV6::new(opte0.boundary_ip(), 64, b00ll.ip, Some(x0_b00.end_a))?; - //let _r1 = RouteV6::new(opte0.boundary_ip(), 64, b10ll.ip, Some(x1_b10.end_a))?; - - let vopte0 = Vnic::with_mac("vopte0", "opte0", opte0.mac())?; - - let zfs = Zfs::new("opteflowpin")?; - - println!("start xde zone"); - let sled = OpteZone::new("sled.flowpin", &zfs, &[&vopte0.name])?; - - println!("start boundary0 zone"); - let boundary0 = SoftnpuZone::new( - "boundary0.flowpin", - &zfs, - &[&x0_b00.end_b, &u0_b01.end_b], - )?; - - println!("start boundary1 zone"); - let boundary1 = SoftnpuZone::new( - "boundary1.flowpin", - &zfs, - &[&x1_b10.end_b, &u1_b11.end_b], - )?; - - println!("start upstream zone"); - let upstream = Zone::new( - "upstream.flowpin", - brand(), - &zfs, - &[&u0_b01.end_a, &u1_b11.end_a], - &[], - )?; - - println!("setup sled"); - sled.setup(&vopte0.name, opte0.ip())?; - - println!("setup boundary 0"); - boundary0.setup()?; - - println!("setup boundary 1"); - boundary1.setup()?; - - println!("setup upstream"); - upstream.wait_for_network()?; - - println!("run topology"); - run_topo!(run_flowpin_test(&sled, &boundary0, &boundary1, &upstream)?); - - Ok(()) -} - -fn run_flowpin_test( - _sled: &OpteZone<'_>, - _boundary0: &SoftnpuZone<'_>, - _boundary1: &SoftnpuZone<'_>, - _upstream: &Zone, -) -> Result<()> { - Ok(()) -} diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 52f2fabf..c6330e3d 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -107,9 +107,10 @@ const XDE_STR: *const c_char = b"xde\0".as_ptr() as *const c_char; /// Name of the control device. const XDE_CTL_STR: *const c_char = b"ctl\0".as_ptr() as *const c_char; -/// The boundary services anycast address fd00:99:: -const BOUNDARY_SERVICES_ACAST: Ipv6Addr = - Ipv6Addr::from_const([0xfd00, 0x99, 0, 0, 0, 0, 0, 0]); +//TODO make configurable +/// The boundary services prefix fd00:99:: +const BOUNDARY_SERVICES_PREFIX: u128 = + 0xfd00_0099_0000_0000_0000_0000_0000_0000u128; /// Minor number for the control device. // Set once in `xde_attach`. @@ -1532,7 +1533,7 @@ unsafe extern "C" fn xde_mc_tx( return guest_loopback(src_dev, pkt, vni); } - let hash = if ip6.dst == BOUNDARY_SERVICES_ACAST { + let hash = if ip6.dst.has_prefix(BOUNDARY_SERVICES_PREFIX, 32) { match meta.inner.ip { Some(IpMeta::Ip4(m)) => Some(l4_hash!(meta, m)), Some(IpMeta::Ip6(m)) => Some(l4_hash!(meta, m)),