Auto merge of #2315 - saethlin:shrink-item, r=saethlin

Optimizing Stacked Borrows (part 2): Shrink Item This moves protectors out of `Item`, storing them both in a global `HashSet` which contains all currently-protected tags as well as a `Vec<SbTag>` on each `Frame` so that when we return from a function we know which tags to remove from the protected set. This also bit-packs the 64-bit tag and the 2-bit permission together when they are stored in memory. This means we theoretically run out of tags sooner, but I doubt that limit will ever be hit. Together these optimizations reduce the memory footprint of Miri when executing programs which stress Stacked Borrows by ~66%. For example, running a test with isolation off which only panics currently peaks at ~19 GB, with this PR it peaks at ~6.2 GB. To-do - [x] Enforce the 62-bit limit - [x] Decide if there is a better order to pack the tag and permission in - [x] Wait for `UnsafeCell` to become infectious, or express offsets + tags in the global protector set Benchmarks before: ``` Benchmark 1: cargo +miri miri run --manifest-path bench-cargo-miri/backtraces/Cargo.toml Time (mean ± σ): 8.948 s ± 0.253 s [User: 8.752 s, System: 0.158 s] Range (min … max): 8.619 s … 9.279 s 5 runs Benchmark 1: cargo +miri miri run --manifest-path bench-cargo-miri/mse/Cargo.toml Time (mean ± σ): 2.129 s ± 0.037 s [User: 1.849 s, System: 0.248 s] Range (min … max): 2.086 s … 2.176 s 5 runs Benchmark 1: cargo +miri miri run --manifest-path bench-cargo-miri/serde1/Cargo.toml Time (mean ± σ): 3.334 s ± 0.017 s [User: 3.211 s, System: 0.103 s] Range (min … max): 3.315 s … 3.352 s 5 runs Benchmark 1: cargo +miri miri run --manifest-path bench-cargo-miri/serde2/Cargo.toml Time (mean ± σ): 3.316 s ± 0.038 s [User: 3.207 s, System: 0.095 s] Range (min … max): 3.282 s … 3.375 s 5 runs Benchmark 1: cargo +miri miri run --manifest-path bench-cargo-miri/unicode/Cargo.toml Time (mean ± σ): 6.391 s ± 0.323 s [User: 5.928 s, System: 0.412 s] Range (min … max): 6.090 s … 6.917 s 5 runs ``` After: ``` Benchmark 1: cargo +miri miri run --manifest-path bench-cargo-miri/backtraces/Cargo.toml Time (mean ± σ): 6.955 s ± 0.051 s [User: 6.807 s, System: 0.132 s] Range (min … max): 6.900 s … 7.038 s 5 runs Benchmark 1: cargo +miri miri run --manifest-path bench-cargo-miri/mse/Cargo.toml Time (mean ± σ): 1.784 s ± 0.012 s [User: 1.627 s, System: 0.156 s] Range (min … max): 1.772 s … 1.797 s 5 runs Benchmark 1: cargo +miri miri run --manifest-path bench-cargo-miri/serde1/Cargo.toml Time (mean ± σ): 2.505 s ± 0.095 s [User: 2.311 s, System: 0.096 s] Range (min … max): 2.405 s … 2.603 s 5 runs Benchmark 1: cargo +miri miri run --manifest-path bench-cargo-miri/serde2/Cargo.toml Time (mean ± σ): 2.449 s ± 0.031 s [User: 2.306 s, System: 0.100 s] Range (min … max): 2.395 s … 2.467 s 5 runs Benchmark 1: cargo +miri miri run --manifest-path bench-cargo-miri/unicode/Cargo.toml Time (mean ± σ): 3.667 s ± 0.110 s [User: 3.498 s, System: 0.140 s] Range (min … max): 3.564 s … 3.814 s 5 runs ``` The decrease in system time is probably due to spending less time in the page fault handler.
rust-lang · Jul 13, 2022 · db5a2b9 · db5a2b9
2 parents f223a5f + 4eff60a
commit db5a2b9
Show file tree

Hide file tree

Showing 20 changed files with 317 additions and 153 deletions.
diff --git a/src/lib.rs b/src/lib.rs
@@ -90,8 +90,8 @@ pub use crate::mono_hash_map::MonoHashMap;
 pub use crate::operator::EvalContextExt as OperatorEvalContextExt;
 pub use crate::range_map::RangeMap;
 pub use crate::stacked_borrows::{
-    stack::Stack, CallId, EvalContextExt as StackedBorEvalContextExt, Item, Permission, SbTag,
-    SbTagExtra, Stacks,
+    CallId, EvalContextExt as StackedBorEvalContextExt, Item, Permission, SbTag, SbTagExtra, Stack,
+    Stacks,
 };
 pub use crate::sync::{CondvarId, EvalContextExt as SyncEvalContextExt, MutexId, RwLockId};
 pub use crate::thread::{

diff --git a/src/machine.rs b/src/machine.rs
@@ -5,7 +5,6 @@ use std::borrow::Cow;
 use std::cell::RefCell;
 use std::collections::HashSet;
 use std::fmt;
-use std::num::NonZeroU64;
 use std::time::Instant;
 
 use rand::rngs::StdRng;
@@ -43,7 +42,7 @@ pub const NUM_CPUS: u64 = 1;
 /// Extra data stored with each stack frame
 pub struct FrameData<'tcx> {
     /// Extra data for Stacked Borrows.
-    pub call_id: stacked_borrows::CallId,
+    pub stacked_borrows: Option<stacked_borrows::FrameExtra>,
 
     /// If this is Some(), then this is a special "catch unwind" frame (the frame of `try_fn`
     /// called by `try`). When this frame is popped during unwinding a panic,
@@ -59,9 +58,9 @@ pub struct FrameData<'tcx> {
 impl<'tcx> std::fmt::Debug for FrameData<'tcx> {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         // Omitting `timing`, it does not support `Debug`.
-        let FrameData { call_id, catch_unwind, timing: _ } = self;
+        let FrameData { stacked_borrows, catch_unwind, timing: _ } = self;
         f.debug_struct("FrameData")
-            .field("call_id", call_id)
+            .field("stacked_borrows", stacked_borrows)
             .field("catch_unwind", catch_unwind)
             .finish()
     }
@@ -788,6 +787,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> {
                 range,
                 machine.stacked_borrows.as_ref().unwrap(),
                 machine.current_span(),
+                &machine.threads,
             )?;
         }
         if let Some(weak_memory) = &alloc_extra.weak_memory {
@@ -819,6 +819,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> {
                 range,
                 machine.stacked_borrows.as_ref().unwrap(),
                 machine.current_span(),
+                &machine.threads,
             )?;
         }
         if let Some(weak_memory) = &alloc_extra.weak_memory {
@@ -852,6 +853,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> {
                 tag,
                 range,
                 machine.stacked_borrows.as_ref().unwrap(),
+                &machine.threads,
             )
         } else {
             Ok(())
@@ -888,11 +890,12 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> {
         };
 
         let stacked_borrows = ecx.machine.stacked_borrows.as_ref();
-        let call_id = stacked_borrows.map_or(NonZeroU64::new(1).unwrap(), |stacked_borrows| {
-            stacked_borrows.borrow_mut().new_call()
-        });
 
-        let extra = FrameData { call_id, catch_unwind: None, timing };
+        let extra = FrameData {
+            stacked_borrows: stacked_borrows.map(|sb| sb.borrow_mut().new_frame()),
+            catch_unwind: None,
+            timing,
+        };
         Ok(frame.with_extra(extra))
     }
 
@@ -936,7 +939,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> {
     ) -> InterpResult<'tcx, StackPopJump> {
         let timing = frame.extra.timing.take();
         if let Some(stacked_borrows) = &ecx.machine.stacked_borrows {
-            stacked_borrows.borrow_mut().end_call(frame.extra.call_id);
+            stacked_borrows.borrow_mut().end_call(&frame.extra);
         }
         let res = ecx.handle_stack_pop_unwind(frame.extra, unwinding);
         if let Some(profiler) = ecx.machine.profiler.as_ref() {