From 80972f1e0e6b6d409cc4d86202608c22e5ee3e5a Mon Sep 17 00:00:00 2001 From: Dario Nieuwenhuis Date: Thu, 30 Mar 2023 17:55:55 +0200 Subject: executor,sync: add support for turbo-wakers. This is a `core` patch to make wakers 1 word (the task pointer) instead of 2 (task pointer + vtable). It allows having the "waker optimization" we had a while back on `WakerRegistration/AtomicWaker`, but EVERYWHERE, without patching all crates. Advantages: - Less memory usage. - Faster. - `AtomicWaker` can actually use atomics to load/store the waker, No critical section needed. - No `dyn` call, which means `cargo-call-stack` can now see through wakes. Disadvantages: - You have to patch `core`... - Breaks all executors and other things that create wakers, unless they opt in to using the new `from_ptr` API. How to use: - Run this shell script to patch `core`. https://gist.github.com/Dirbaio/c67da7cf318515181539122c9d32b395 - Enable `build-std` - Enable `build-std-features = core/turbowakers` - Enable feature `turbowakers` in `embassy-executor`, `embassy-sync`. - Make sure you have no other crate creating wakers other than `embassy-executor`. These will panic at runtime. Note that the patched `core` is equivalent to the unpached one when the `turbowakers` feature is not enabled, so it should be fine to leave it there. --- embassy-executor/src/raw/mod.rs | 1 + embassy-executor/src/raw/waker_turbo.rs | 34 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 embassy-executor/src/raw/waker_turbo.rs (limited to 'embassy-executor/src/raw') diff --git a/embassy-executor/src/raw/mod.rs b/embassy-executor/src/raw/mod.rs index 15ff18fc8..72c367c33 100644 --- a/embassy-executor/src/raw/mod.rs +++ b/embassy-executor/src/raw/mod.rs @@ -11,6 +11,7 @@ mod run_queue; #[cfg(feature = "integrated-timers")] mod timer_queue; pub(crate) mod util; +#[cfg_attr(feature = "turbowakers", path = "waker_turbo.rs")] mod waker; use core::future::Future; diff --git a/embassy-executor/src/raw/waker_turbo.rs b/embassy-executor/src/raw/waker_turbo.rs new file mode 100644 index 000000000..435a0ff7e --- /dev/null +++ b/embassy-executor/src/raw/waker_turbo.rs @@ -0,0 +1,34 @@ +use core::ptr::NonNull; +use core::task::Waker; + +use super::{wake_task, TaskHeader, TaskRef}; + +pub(crate) unsafe fn from_task(p: TaskRef) -> Waker { + Waker::from_turbo_ptr(NonNull::new_unchecked(p.as_ptr() as _)) +} + +/// Get a task pointer from a waker. +/// +/// This can be used as an optimization in wait queues to store task pointers +/// (1 word) instead of full Wakers (2 words). This saves a bit of RAM and helps +/// avoid dynamic dispatch. +/// +/// You can use the returned task pointer to wake the task with [`wake_task`](super::wake_task). +/// +/// # Panics +/// +/// Panics if the waker is not created by the Embassy executor. +pub fn task_from_waker(waker: &Waker) -> TaskRef { + let ptr = waker.as_turbo_ptr().as_ptr(); + + // safety: our wakers are always created with `TaskRef::as_ptr` + unsafe { TaskRef::from_ptr(ptr as *const TaskHeader) } +} + +#[inline(never)] +#[no_mangle] +fn _turbo_wake(ptr: NonNull<()>) { + // safety: our wakers are always created with `TaskRef::as_ptr` + let task = unsafe { TaskRef::from_ptr(ptr.as_ptr() as *const TaskHeader) }; + wake_task(task) +} -- cgit From d3c4e4a20a05085eae8d568c7efdbe09bada9cf5 Mon Sep 17 00:00:00 2001 From: Dario Nieuwenhuis Date: Mon, 3 Apr 2023 01:18:27 +0200 Subject: executor: add Pender, rework Cargo features. This introduces a `Pender` struct with enum cases for thread-mode, interrupt-mode and custom callback executors. This avoids calls through function pointers when using only the thread or interrupt executors. Faster, and friendlier to `cargo-call-stack`. `embassy-executor` now has `arch-xxx` Cargo features to select the arch and to enable the builtin executors (thread and interrupt). --- embassy-executor/src/raw/mod.rs | 97 ++++++++++++++++++++++++++++++++--------- 1 file changed, 76 insertions(+), 21 deletions(-) (limited to 'embassy-executor/src/raw') diff --git a/embassy-executor/src/raw/mod.rs b/embassy-executor/src/raw/mod.rs index 72c367c33..f6c66da5a 100644 --- a/embassy-executor/src/raw/mod.rs +++ b/embassy-executor/src/raw/mod.rs @@ -19,7 +19,6 @@ use core::marker::PhantomData; use core::mem; use core::pin::Pin; use core::ptr::NonNull; -use core::sync::atomic::AtomicPtr; use core::task::{Context, Poll}; use atomic_polyfill::{AtomicU32, Ordering}; @@ -290,10 +289,60 @@ impl TaskPool { } } +#[derive(Clone, Copy)] +pub(crate) enum PenderInner { + #[cfg(feature = "executor-thread")] + Thread(crate::arch::ThreadPender), + #[cfg(feature = "executor-interrupt")] + Interrupt(crate::arch::InterruptPender), + #[cfg(feature = "pender-callback")] + Callback { func: fn(*mut ()), context: *mut () }, +} + +unsafe impl Send for PenderInner {} +unsafe impl Sync for PenderInner {} + +/// Platform/architecture-specific action executed when an executor has pending work. +/// +/// When a task within an executor is woken, the `Pender` is called. This does a +/// platform/architecture-specific action to signal there is pending work in the executor. +/// When this happens, you must arrange for [`Executor::poll`] to be called. +/// +/// You can think of it as a waker, but for the whole executor. +pub struct Pender(pub(crate) PenderInner); + +impl Pender { + /// Create a `Pender` that will call an arbitrary function pointer. + /// + /// # Arguments + /// + /// - `func`: The function pointer to call. + /// - `context`: Opaque context pointer, that will be passed to the function pointer. + #[cfg(feature = "pender-callback")] + pub fn new_from_callback(func: fn(*mut ()), context: *mut ()) -> Self { + Self(PenderInner::Callback { + func, + context: context.into(), + }) + } +} + +impl Pender { + pub(crate) fn pend(&self) { + match self.0 { + #[cfg(feature = "executor-thread")] + PenderInner::Thread(x) => x.pend(), + #[cfg(feature = "executor-interrupt")] + PenderInner::Interrupt(x) => x.pend(), + #[cfg(feature = "pender-callback")] + PenderInner::Callback { func, context } => func(context), + } + } +} + pub(crate) struct SyncExecutor { run_queue: RunQueue, - signal_fn: fn(*mut ()), - signal_ctx: AtomicPtr<()>, + pender: Pender, #[cfg(feature = "integrated-timers")] pub(crate) timer_queue: timer_queue::TimerQueue, @@ -302,16 +351,13 @@ pub(crate) struct SyncExecutor { } impl SyncExecutor { - pub(crate) fn new(signal_fn: fn(*mut ()), signal_ctx: *mut ()) -> Self { + pub(crate) fn new(pender: Pender) -> Self { #[cfg(feature = "integrated-timers")] let alarm = unsafe { unwrap!(driver::allocate_alarm()) }; - #[cfg(feature = "integrated-timers")] - driver::set_alarm_callback(alarm, signal_fn, signal_ctx); Self { run_queue: RunQueue::new(), - signal_fn, - signal_ctx: AtomicPtr::new(signal_ctx), + pender, #[cfg(feature = "integrated-timers")] timer_queue: timer_queue::TimerQueue::new(), @@ -332,10 +378,16 @@ impl SyncExecutor { trace::task_ready_begin(task.as_ptr() as u32); if self.run_queue.enqueue(cs, task) { - (self.signal_fn)(self.signal_ctx.load(Ordering::Relaxed)) + self.pender.pend(); } } + #[cfg(feature = "integrated-timers")] + fn alarm_callback(ctx: *mut ()) { + let this: &Self = unsafe { &*(ctx as *const Self) }; + this.pender.pend(); + } + pub(super) unsafe fn spawn(&'static self, task: TaskRef) { task.header().executor.set(Some(self)); @@ -351,6 +403,9 @@ impl SyncExecutor { /// /// Same as [`Executor::poll`], plus you must only call this on the thread this executor was created. pub(crate) unsafe fn poll(&'static self) { + #[cfg(feature = "integrated-timers")] + driver::set_alarm_callback(self.alarm, Self::alarm_callback, self as *const _ as *mut ()); + #[allow(clippy::never_loop)] loop { #[cfg(feature = "integrated-timers")] @@ -417,14 +472,14 @@ impl SyncExecutor { /// /// - To get the executor to do work, call `poll()`. This will poll all queued tasks (all tasks /// that "want to run"). -/// - You must supply a `signal_fn`. The executor will call it to notify you it has work +/// - You must supply a [`Pender`]. The executor will call it to notify you it has work /// to do. You must arrange for `poll()` to be called as soon as possible. /// -/// `signal_fn` can be called from *any* context: any thread, any interrupt priority +/// The [`Pender`] can be called from *any* context: any thread, any interrupt priority /// level, etc. It may be called synchronously from any `Executor` method call as well. /// You must deal with this correctly. /// -/// In particular, you must NOT call `poll` directly from `signal_fn`, as this violates +/// In particular, you must NOT call `poll` directly from the pender callback, as this violates /// the requirement for `poll` to not be called reentrantly. #[repr(transparent)] pub struct Executor { @@ -437,15 +492,15 @@ impl Executor { pub(crate) unsafe fn wrap(inner: &SyncExecutor) -> &Self { mem::transmute(inner) } + /// Create a new executor. /// - /// When the executor has work to do, it will call `signal_fn` with - /// `signal_ctx` as argument. + /// When the executor has work to do, it will call the [`Pender`]. /// - /// See [`Executor`] docs for details on `signal_fn`. - pub fn new(signal_fn: fn(*mut ()), signal_ctx: *mut ()) -> Self { + /// See [`Executor`] docs for details on `Pender`. + pub fn new(pender: Pender) -> Self { Self { - inner: SyncExecutor::new(signal_fn, signal_ctx), + inner: SyncExecutor::new(pender), _not_sync: PhantomData, } } @@ -468,16 +523,16 @@ impl Executor { /// This loops over all tasks that are queued to be polled (i.e. they're /// freshly spawned or they've been woken). Other tasks are not polled. /// - /// You must call `poll` after receiving a call to `signal_fn`. It is OK - /// to call `poll` even when not requested by `signal_fn`, but it wastes + /// You must call `poll` after receiving a call to the [`Pender`]. It is OK + /// to call `poll` even when not requested by the `Pender`, but it wastes /// energy. /// /// # Safety /// /// You must NOT call `poll` reentrantly on the same executor. /// - /// In particular, note that `poll` may call `signal_fn` synchronously. Therefore, you - /// must NOT directly call `poll()` from your `signal_fn`. Instead, `signal_fn` has to + /// In particular, note that `poll` may call the `Pender` synchronously. Therefore, you + /// must NOT directly call `poll()` from the `Pender` callback. Instead, the callback has to /// somehow schedule for `poll()` to be called later, at a time you know for sure there's /// no `poll()` already running. pub unsafe fn poll(&'static self) { -- cgit From 8290236ed64435453a9c028c95246a86371bd4ce Mon Sep 17 00:00:00 2001 From: Grant Miller Date: Sun, 2 Apr 2023 14:11:31 -0500 Subject: executor: Replace unsound critical sections with atomics --- embassy-executor/src/raw/mod.rs | 28 +++++++++++++--------------- embassy-executor/src/raw/run_queue.rs | 18 ++++++++++++------ 2 files changed, 25 insertions(+), 21 deletions(-) (limited to 'embassy-executor/src/raw') diff --git a/embassy-executor/src/raw/mod.rs b/embassy-executor/src/raw/mod.rs index f6c66da5a..bd0cff26b 100644 --- a/embassy-executor/src/raw/mod.rs +++ b/embassy-executor/src/raw/mod.rs @@ -22,7 +22,6 @@ use core::ptr::NonNull; use core::task::{Context, Poll}; use atomic_polyfill::{AtomicU32, Ordering}; -use critical_section::CriticalSection; #[cfg(feature = "integrated-timers")] use embassy_time::driver::{self, AlarmHandle}; #[cfg(feature = "integrated-timers")] @@ -373,11 +372,11 @@ impl SyncExecutor { /// - `task` must be set up to run in this executor. /// - `task` must NOT be already enqueued (in this executor or another one). #[inline(always)] - unsafe fn enqueue(&self, cs: CriticalSection, task: TaskRef) { + unsafe fn enqueue(&self, task: TaskRef) { #[cfg(feature = "rtos-trace")] trace::task_ready_begin(task.as_ptr() as u32); - if self.run_queue.enqueue(cs, task) { + if self.run_queue.enqueue(task) { self.pender.pend(); } } @@ -394,9 +393,7 @@ impl SyncExecutor { #[cfg(feature = "rtos-trace")] trace::task_new(task.as_ptr() as u32); - critical_section::with(|cs| { - self.enqueue(cs, task); - }) + self.enqueue(task); } /// # Safety @@ -552,24 +549,25 @@ impl Executor { /// /// You can obtain a `TaskRef` from a `Waker` using [`task_from_waker`]. pub fn wake_task(task: TaskRef) { - critical_section::with(|cs| { - let header = task.header(); - let state = header.state.load(Ordering::Relaxed); + let header = task.header(); + let res = header.state.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |state| { // If already scheduled, or if not started, if (state & STATE_RUN_QUEUED != 0) || (state & STATE_SPAWNED == 0) { - return; + None + } else { + // Mark it as scheduled + Some(state | STATE_RUN_QUEUED) } + }); - // Mark it as scheduled - header.state.store(state | STATE_RUN_QUEUED, Ordering::Relaxed); - + if res.is_ok() { // We have just marked the task as scheduled, so enqueue it. unsafe { let executor = header.executor.get().unwrap_unchecked(); - executor.enqueue(cs, task); + executor.enqueue(task); } - }) + } } #[cfg(feature = "integrated-timers")] diff --git a/embassy-executor/src/raw/run_queue.rs b/embassy-executor/src/raw/run_queue.rs index 362157535..a88174a0c 100644 --- a/embassy-executor/src/raw/run_queue.rs +++ b/embassy-executor/src/raw/run_queue.rs @@ -2,7 +2,6 @@ use core::ptr; use core::ptr::NonNull; use atomic_polyfill::{AtomicPtr, Ordering}; -use critical_section::CriticalSection; use super::{TaskHeader, TaskRef}; @@ -46,11 +45,18 @@ impl RunQueue { /// /// `item` must NOT be already enqueued in any queue. #[inline(always)] - pub(crate) unsafe fn enqueue(&self, _cs: CriticalSection, task: TaskRef) -> bool { - let prev = self.head.load(Ordering::Relaxed); - task.header().run_queue_item.next.store(prev, Ordering::Relaxed); - self.head.store(task.as_ptr() as _, Ordering::Relaxed); - prev.is_null() + pub(crate) unsafe fn enqueue(&self, task: TaskRef) -> bool { + let mut was_empty = false; + + self.head + .fetch_update(Ordering::SeqCst, Ordering::SeqCst, |prev| { + was_empty = prev.is_null(); + task.header().run_queue_item.next.store(prev, Ordering::Relaxed); + Some(task.as_ptr() as *mut _) + }) + .ok(); + + was_empty } /// Empty the queue, then call `on_task` for each task that was in the queue. -- cgit