diff options
| author | bors[bot] <26634292+bors[bot]@users.noreply.github.com> | 2021-10-18 12:05:43 +0000 |
|---|---|---|
| committer | GitHub <[email protected]> | 2021-10-18 12:05:43 +0000 |
| commit | 729b17bc25fed42b4348cae0fb3d781590572c3f (patch) | |
| tree | 6ad5c5d3d181aaa879a3261136312eb1827720ba | |
| parent | b22c472af3a7e88c2855e6de216dcfa15ff155d1 (diff) | |
| parent | d32477f5a19496911a2a95b002bb7cddf5b9e605 (diff) | |
Merge #428
428: executor: Use critical sections instead of atomic CAS loops r=lulf a=Dirbaio
Optimize executor wakes.
CAS loops (either `fetch_update`, or manual `load + compare_exchange_weak`) generate surprisingly horrible code: https://godbolt.org/z/zhscnM1cb
This switches to using critical sections, which makes it faster. On thumbv6 (Cortex-M0) it should make it even faster, as it is currently using `atomic-polyfill`, which will make many critical sections for each `compare_exchange_weak` anyway.
```
opt-level=3 opt-level=s
atmics: 105 cycles 101 cycles
CS: 76 cycles 72 cycles
CS+inline: 72 cycles 64 cycles
```
Measured in nrf52 with icache disabled, with this code:
```rust
poll_fn(|cx| {
let task = unsafe { task_from_waker(cx.waker()) };
compiler_fence(Ordering::SeqCst);
let a = cortex_m::peripheral::DWT::get_cycle_count();
compiler_fence(Ordering::SeqCst);
unsafe { wake_task(task) }
compiler_fence(Ordering::SeqCst);
let b = cortex_m::peripheral::DWT::get_cycle_count();
compiler_fence(Ordering::SeqCst);
defmt::info!("cycles: {=u32}", b.wrapping_sub(a));
Poll::Ready(())
})
.await;
````
Co-authored-by: Dario Nieuwenhuis <[email protected]>
| -rw-r--r-- | embassy/src/executor/raw/mod.rs | 39 | ||||
| -rw-r--r-- | embassy/src/executor/raw/run_queue.rs | 19 |
2 files changed, 24 insertions, 34 deletions
diff --git a/embassy/src/executor/raw/mod.rs b/embassy/src/executor/raw/mod.rs index 08de77739..5ee38715d 100644 --- a/embassy/src/executor/raw/mod.rs +++ b/embassy/src/executor/raw/mod.rs | |||
| @@ -20,6 +20,7 @@ use core::pin::Pin; | |||
| 20 | use core::ptr::NonNull; | 20 | use core::ptr::NonNull; |
| 21 | use core::task::{Context, Poll}; | 21 | use core::task::{Context, Poll}; |
| 22 | use core::{mem, ptr}; | 22 | use core::{mem, ptr}; |
| 23 | use critical_section::CriticalSection; | ||
| 23 | 24 | ||
| 24 | use self::run_queue::{RunQueue, RunQueueItem}; | 25 | use self::run_queue::{RunQueue, RunQueueItem}; |
| 25 | use self::util::UninitCell; | 26 | use self::util::UninitCell; |
| @@ -71,30 +72,22 @@ impl TaskHeader { | |||
| 71 | } | 72 | } |
| 72 | 73 | ||
| 73 | pub(crate) unsafe fn enqueue(&self) { | 74 | pub(crate) unsafe fn enqueue(&self) { |
| 74 | let mut current = self.state.load(Ordering::Acquire); | 75 | critical_section::with(|cs| { |
| 75 | loop { | 76 | let state = self.state.load(Ordering::Relaxed); |
| 77 | |||
| 76 | // If already scheduled, or if not started, | 78 | // If already scheduled, or if not started, |
| 77 | if (current & STATE_RUN_QUEUED != 0) || (current & STATE_SPAWNED == 0) { | 79 | if (state & STATE_RUN_QUEUED != 0) || (state & STATE_SPAWNED == 0) { |
| 78 | return; | 80 | return; |
| 79 | } | 81 | } |
| 80 | 82 | ||
| 81 | // Mark it as scheduled | 83 | // Mark it as scheduled |
| 82 | let new = current | STATE_RUN_QUEUED; | 84 | self.state |
| 83 | 85 | .store(state | STATE_RUN_QUEUED, Ordering::Relaxed); | |
| 84 | match self.state.compare_exchange_weak( | ||
| 85 | current, | ||
| 86 | new, | ||
| 87 | Ordering::AcqRel, | ||
| 88 | Ordering::Acquire, | ||
| 89 | ) { | ||
| 90 | Ok(_) => break, | ||
| 91 | Err(next_current) => current = next_current, | ||
| 92 | } | ||
| 93 | } | ||
| 94 | 86 | ||
| 95 | // We have just marked the task as scheduled, so enqueue it. | 87 | // We have just marked the task as scheduled, so enqueue it. |
| 96 | let executor = &*self.executor.get(); | 88 | let executor = &*self.executor.get(); |
| 97 | executor.enqueue(self as *const TaskHeader as *mut TaskHeader); | 89 | executor.enqueue(cs, self as *const TaskHeader as *mut TaskHeader); |
| 90 | }) | ||
| 98 | } | 91 | } |
| 99 | } | 92 | } |
| 100 | 93 | ||
| @@ -264,8 +257,9 @@ impl Executor { | |||
| 264 | /// - `task` must be a valid pointer to a spawned task. | 257 | /// - `task` must be a valid pointer to a spawned task. |
| 265 | /// - `task` must be set up to run in this executor. | 258 | /// - `task` must be set up to run in this executor. |
| 266 | /// - `task` must NOT be already enqueued (in this executor or another one). | 259 | /// - `task` must NOT be already enqueued (in this executor or another one). |
| 267 | unsafe fn enqueue(&self, task: *mut TaskHeader) { | 260 | #[inline(always)] |
| 268 | if self.run_queue.enqueue(task) { | 261 | unsafe fn enqueue(&self, cs: CriticalSection, task: *mut TaskHeader) { |
| 262 | if self.run_queue.enqueue(cs, task) { | ||
| 269 | (self.signal_fn)(self.signal_ctx) | 263 | (self.signal_fn)(self.signal_ctx) |
| 270 | } | 264 | } |
| 271 | } | 265 | } |
| @@ -282,7 +276,10 @@ impl Executor { | |||
| 282 | pub(super) unsafe fn spawn(&'static self, task: NonNull<TaskHeader>) { | 276 | pub(super) unsafe fn spawn(&'static self, task: NonNull<TaskHeader>) { |
| 283 | let task = task.as_ref(); | 277 | let task = task.as_ref(); |
| 284 | task.executor.set(self); | 278 | task.executor.set(self); |
| 285 | self.enqueue(task as *const _ as _); | 279 | |
| 280 | critical_section::with(|cs| { | ||
| 281 | self.enqueue(cs, task as *const _ as _); | ||
| 282 | }) | ||
| 286 | } | 283 | } |
| 287 | 284 | ||
| 288 | /// Poll all queued tasks in this executor. | 285 | /// Poll all queued tasks in this executor. |
diff --git a/embassy/src/executor/raw/run_queue.rs b/embassy/src/executor/raw/run_queue.rs index 24624e1bd..7aa2079fa 100644 --- a/embassy/src/executor/raw/run_queue.rs +++ b/embassy/src/executor/raw/run_queue.rs | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | use atomic_polyfill::{AtomicPtr, Ordering}; | 1 | use atomic_polyfill::{AtomicPtr, Ordering}; |
| 2 | use core::ptr; | 2 | use core::ptr; |
| 3 | use core::ptr::NonNull; | 3 | use core::ptr::NonNull; |
| 4 | use critical_section::CriticalSection; | ||
| 4 | 5 | ||
| 5 | use super::TaskHeader; | 6 | use super::TaskHeader; |
| 6 | 7 | ||
| @@ -43,19 +44,11 @@ impl RunQueue { | |||
| 43 | /// # Safety | 44 | /// # Safety |
| 44 | /// | 45 | /// |
| 45 | /// `item` must NOT be already enqueued in any queue. | 46 | /// `item` must NOT be already enqueued in any queue. |
| 46 | pub(crate) unsafe fn enqueue(&self, task: *mut TaskHeader) -> bool { | 47 | #[inline(always)] |
| 47 | let mut prev = self.head.load(Ordering::Acquire); | 48 | pub(crate) unsafe fn enqueue(&self, _cs: CriticalSection, task: *mut TaskHeader) -> bool { |
| 48 | loop { | 49 | let prev = self.head.load(Ordering::Relaxed); |
| 49 | (*task).run_queue_item.next.store(prev, Ordering::Relaxed); | 50 | (*task).run_queue_item.next.store(prev, Ordering::Relaxed); |
| 50 | match self | 51 | self.head.store(task, Ordering::Relaxed); |
| 51 | .head | ||
| 52 | .compare_exchange_weak(prev, task, Ordering::AcqRel, Ordering::Acquire) | ||
| 53 | { | ||
| 54 | Ok(_) => break, | ||
| 55 | Err(next_prev) => prev = next_prev, | ||
| 56 | } | ||
| 57 | } | ||
| 58 | |||
| 59 | prev.is_null() | 52 | prev.is_null() |
| 60 | } | 53 | } |
| 61 | 54 | ||
