From 80972f1e0e6b6d409cc4d86202608c22e5ee3e5a Mon Sep 17 00:00:00 2001
From: Dario Nieuwenhuis <dirbaio@dirbaio.net>
Date: Thu, 30 Mar 2023 17:55:55 +0200
Subject: executor,sync: add support for turbo-wakers.

This is a `core` patch to make wakers 1 word (the task pointer) instead of 2 (task pointer + vtable). It allows having the "waker optimization" we had a while back on `WakerRegistration/AtomicWaker`, but EVERYWHERE, without patching all crates.

Advantages:
- Less memory usage.
- Faster.
- `AtomicWaker` can actually use atomics to load/store the waker, No critical section needed.
- No `dyn` call, which means `cargo-call-stack` can now see through wakes.

Disadvantages:
- You have to patch `core`...
- Breaks all executors and other things that create wakers, unless they opt in to using the new `from_ptr` API.

How to use:

- Run this shell script to patch `core`. https://gist.github.com/Dirbaio/c67da7cf318515181539122c9d32b395
- Enable `build-std`
- Enable `build-std-features = core/turbowakers`
- Enable feature `turbowakers` in `embassy-executor`, `embassy-sync`.
- Make sure you have no other crate creating wakers other than `embassy-executor`. These will panic at runtime.

Note that the patched `core` is equivalent to the unpached one when the `turbowakers` feature is not enabled, so it should be fine to leave it there.
---
 embassy-executor/src/raw/mod.rs         |  1 +
 embassy-executor/src/raw/waker_turbo.rs | 34 +++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)
 create mode 100644 embassy-executor/src/raw/waker_turbo.rs

(limited to 'embassy-executor/src/raw')

diff --git a/embassy-executor/src/raw/mod.rs b/embassy-executor/src/raw/mod.rs
index 15ff18fc8..72c367c33 100644
--- a/embassy-executor/src/raw/mod.rs
+++ b/embassy-executor/src/raw/mod.rs
@@ -11,6 +11,7 @@ mod run_queue;
 #[cfg(feature = "integrated-timers")]
 mod timer_queue;
 pub(crate) mod util;
+#[cfg_attr(feature = "turbowakers", path = "waker_turbo.rs")]
 mod waker;
 
 use core::future::Future;
diff --git a/embassy-executor/src/raw/waker_turbo.rs b/embassy-executor/src/raw/waker_turbo.rs
new file mode 100644
index 000000000..435a0ff7e
--- /dev/null
+++ b/embassy-executor/src/raw/waker_turbo.rs
@@ -0,0 +1,34 @@
+use core::ptr::NonNull;
+use core::task::Waker;
+
+use super::{wake_task, TaskHeader, TaskRef};
+
+pub(crate) unsafe fn from_task(p: TaskRef) -> Waker {
+    Waker::from_turbo_ptr(NonNull::new_unchecked(p.as_ptr() as _))
+}
+
+/// Get a task pointer from a waker.
+///
+/// This can be used as an optimization in wait queues to store task pointers
+/// (1 word) instead of full Wakers (2 words). This saves a bit of RAM and helps
+/// avoid dynamic dispatch.
+///
+/// You can use the returned task pointer to wake the task with [`wake_task`](super::wake_task).
+///
+/// # Panics
+///
+/// Panics if the waker is not created by the Embassy executor.
+pub fn task_from_waker(waker: &Waker) -> TaskRef {
+    let ptr = waker.as_turbo_ptr().as_ptr();
+
+    // safety: our wakers are always created with `TaskRef::as_ptr`
+    unsafe { TaskRef::from_ptr(ptr as *const TaskHeader) }
+}
+
+#[inline(never)]
+#[no_mangle]
+fn _turbo_wake(ptr: NonNull<()>) {
+    // safety: our wakers are always created with `TaskRef::as_ptr`
+    let task = unsafe { TaskRef::from_ptr(ptr.as_ptr() as *const TaskHeader) };
+    wake_task(task)
+}
-- 
cgit 


From d3c4e4a20a05085eae8d568c7efdbe09bada9cf5 Mon Sep 17 00:00:00 2001
From: Dario Nieuwenhuis <dirbaio@dirbaio.net>
Date: Mon, 3 Apr 2023 01:18:27 +0200
Subject: executor: add Pender, rework Cargo features.

This introduces a `Pender` struct with enum cases for thread-mode, interrupt-mode and
custom callback executors. This avoids calls through function pointers when using only
the thread or interrupt executors. Faster, and friendlier to `cargo-call-stack`.

`embassy-executor` now has `arch-xxx` Cargo features to select the arch and to enable
the builtin executors (thread and interrupt).
---
 embassy-executor/src/raw/mod.rs | 97 ++++++++++++++++++++++++++++++++---------
 1 file changed, 76 insertions(+), 21 deletions(-)

(limited to 'embassy-executor/src/raw')

diff --git a/embassy-executor/src/raw/mod.rs b/embassy-executor/src/raw/mod.rs
index 72c367c33..f6c66da5a 100644
--- a/embassy-executor/src/raw/mod.rs
+++ b/embassy-executor/src/raw/mod.rs
@@ -19,7 +19,6 @@ use core::marker::PhantomData;
 use core::mem;
 use core::pin::Pin;
 use core::ptr::NonNull;
-use core::sync::atomic::AtomicPtr;
 use core::task::{Context, Poll};
 
 use atomic_polyfill::{AtomicU32, Ordering};
@@ -290,10 +289,60 @@ impl<F: Future + 'static, const N: usize> TaskPool<F, N> {
     }
 }
 
+#[derive(Clone, Copy)]
+pub(crate) enum PenderInner {
+    #[cfg(feature = "executor-thread")]
+    Thread(crate::arch::ThreadPender),
+    #[cfg(feature = "executor-interrupt")]
+    Interrupt(crate::arch::InterruptPender),
+    #[cfg(feature = "pender-callback")]
+    Callback { func: fn(*mut ()), context: *mut () },
+}
+
+unsafe impl Send for PenderInner {}
+unsafe impl Sync for PenderInner {}
+
+/// Platform/architecture-specific action executed when an executor has pending work.
+///
+/// When a task within an executor is woken, the `Pender` is called. This does a
+/// platform/architecture-specific action to signal there is pending work in the executor.
+/// When this happens, you must arrange for [`Executor::poll`] to be called.
+///
+/// You can think of it as a waker, but for the whole executor.
+pub struct Pender(pub(crate) PenderInner);
+
+impl Pender {
+    /// Create a `Pender` that will call an arbitrary function pointer.
+    ///
+    /// # Arguments
+    ///
+    /// - `func`: The function pointer to call.
+    /// - `context`: Opaque context pointer, that will be passed to the function pointer.
+    #[cfg(feature = "pender-callback")]
+    pub fn new_from_callback(func: fn(*mut ()), context: *mut ()) -> Self {
+        Self(PenderInner::Callback {
+            func,
+            context: context.into(),
+        })
+    }
+}
+
+impl Pender {
+    pub(crate) fn pend(&self) {
+        match self.0 {
+            #[cfg(feature = "executor-thread")]
+            PenderInner::Thread(x) => x.pend(),
+            #[cfg(feature = "executor-interrupt")]
+            PenderInner::Interrupt(x) => x.pend(),
+            #[cfg(feature = "pender-callback")]
+            PenderInner::Callback { func, context } => func(context),
+        }
+    }
+}
+
 pub(crate) struct SyncExecutor {
     run_queue: RunQueue,
-    signal_fn: fn(*mut ()),
-    signal_ctx: AtomicPtr<()>,
+    pender: Pender,
 
     #[cfg(feature = "integrated-timers")]
     pub(crate) timer_queue: timer_queue::TimerQueue,
@@ -302,16 +351,13 @@ pub(crate) struct SyncExecutor {
 }
 
 impl SyncExecutor {
-    pub(crate) fn new(signal_fn: fn(*mut ()), signal_ctx: *mut ()) -> Self {
+    pub(crate) fn new(pender: Pender) -> Self {
         #[cfg(feature = "integrated-timers")]
         let alarm = unsafe { unwrap!(driver::allocate_alarm()) };
-        #[cfg(feature = "integrated-timers")]
-        driver::set_alarm_callback(alarm, signal_fn, signal_ctx);
 
         Self {
             run_queue: RunQueue::new(),
-            signal_fn,
-            signal_ctx: AtomicPtr::new(signal_ctx),
+            pender,
 
             #[cfg(feature = "integrated-timers")]
             timer_queue: timer_queue::TimerQueue::new(),
@@ -332,10 +378,16 @@ impl SyncExecutor {
         trace::task_ready_begin(task.as_ptr() as u32);
 
         if self.run_queue.enqueue(cs, task) {
-            (self.signal_fn)(self.signal_ctx.load(Ordering::Relaxed))
+            self.pender.pend();
         }
     }
 
+    #[cfg(feature = "integrated-timers")]
+    fn alarm_callback(ctx: *mut ()) {
+        let this: &Self = unsafe { &*(ctx as *const Self) };
+        this.pender.pend();
+    }
+
     pub(super) unsafe fn spawn(&'static self, task: TaskRef) {
         task.header().executor.set(Some(self));
 
@@ -351,6 +403,9 @@ impl SyncExecutor {
     ///
     /// Same as [`Executor::poll`], plus you must only call this on the thread this executor was created.
     pub(crate) unsafe fn poll(&'static self) {
+        #[cfg(feature = "integrated-timers")]
+        driver::set_alarm_callback(self.alarm, Self::alarm_callback, self as *const _ as *mut ());
+
         #[allow(clippy::never_loop)]
         loop {
             #[cfg(feature = "integrated-timers")]
@@ -417,14 +472,14 @@ impl SyncExecutor {
 ///
 /// - To get the executor to do work, call `poll()`. This will poll all queued tasks (all tasks
 ///   that "want to run").
-/// - You must supply a `signal_fn`. The executor will call it to notify you it has work
+/// - You must supply a [`Pender`]. The executor will call it to notify you it has work
 ///   to do. You must arrange for `poll()` to be called as soon as possible.
 ///
-/// `signal_fn` can be called from *any* context: any thread, any interrupt priority
+/// The [`Pender`] can be called from *any* context: any thread, any interrupt priority
 /// level, etc. It may be called synchronously from any `Executor` method call as well.
 /// You must deal with this correctly.
 ///
-/// In particular, you must NOT call `poll` directly from `signal_fn`, as this violates
+/// In particular, you must NOT call `poll` directly from the pender callback, as this violates
 /// the requirement for `poll` to not be called reentrantly.
 #[repr(transparent)]
 pub struct Executor {
@@ -437,15 +492,15 @@ impl Executor {
     pub(crate) unsafe fn wrap(inner: &SyncExecutor) -> &Self {
         mem::transmute(inner)
     }
+
     /// Create a new executor.
     ///
-    /// When the executor has work to do, it will call `signal_fn` with
-    /// `signal_ctx` as argument.
+    /// When the executor has work to do, it will call the [`Pender`].
     ///
-    /// See [`Executor`] docs for details on `signal_fn`.
-    pub fn new(signal_fn: fn(*mut ()), signal_ctx: *mut ()) -> Self {
+    /// See [`Executor`] docs for details on `Pender`.
+    pub fn new(pender: Pender) -> Self {
         Self {
-            inner: SyncExecutor::new(signal_fn, signal_ctx),
+            inner: SyncExecutor::new(pender),
             _not_sync: PhantomData,
         }
     }
@@ -468,16 +523,16 @@ impl Executor {
     /// This loops over all tasks that are queued to be polled (i.e. they're
     /// freshly spawned or they've been woken). Other tasks are not polled.
     ///
-    /// You must call `poll` after receiving a call to `signal_fn`. It is OK
-    /// to call `poll` even when not requested by `signal_fn`, but it wastes
+    /// You must call `poll` after receiving a call to the [`Pender`]. It is OK
+    /// to call `poll` even when not requested by the `Pender`, but it wastes
     /// energy.
     ///
     /// # Safety
     ///
     /// You must NOT call `poll` reentrantly on the same executor.
     ///
-    /// In particular, note that `poll` may call `signal_fn` synchronously. Therefore, you
-    /// must NOT directly call `poll()` from your `signal_fn`. Instead, `signal_fn` has to
+    /// In particular, note that `poll` may call the `Pender` synchronously. Therefore, you
+    /// must NOT directly call `poll()` from the `Pender` callback. Instead, the callback has to
     /// somehow schedule for `poll()` to be called later, at a time you know for sure there's
     /// no `poll()` already running.
     pub unsafe fn poll(&'static self) {
-- 
cgit 


From 8290236ed64435453a9c028c95246a86371bd4ce Mon Sep 17 00:00:00 2001
From: Grant Miller <GrantM11235@gmail.com>
Date: Sun, 2 Apr 2023 14:11:31 -0500
Subject: executor: Replace unsound critical sections with atomics

---
 embassy-executor/src/raw/mod.rs       | 28 +++++++++++++---------------
 embassy-executor/src/raw/run_queue.rs | 18 ++++++++++++------
 2 files changed, 25 insertions(+), 21 deletions(-)

(limited to 'embassy-executor/src/raw')

diff --git a/embassy-executor/src/raw/mod.rs b/embassy-executor/src/raw/mod.rs
index f6c66da5a..bd0cff26b 100644
--- a/embassy-executor/src/raw/mod.rs
+++ b/embassy-executor/src/raw/mod.rs
@@ -22,7 +22,6 @@ use core::ptr::NonNull;
 use core::task::{Context, Poll};
 
 use atomic_polyfill::{AtomicU32, Ordering};
-use critical_section::CriticalSection;
 #[cfg(feature = "integrated-timers")]
 use embassy_time::driver::{self, AlarmHandle};
 #[cfg(feature = "integrated-timers")]
@@ -373,11 +372,11 @@ impl SyncExecutor {
     /// - `task` must be set up to run in this executor.
     /// - `task` must NOT be already enqueued (in this executor or another one).
     #[inline(always)]
-    unsafe fn enqueue(&self, cs: CriticalSection, task: TaskRef) {
+    unsafe fn enqueue(&self, task: TaskRef) {
         #[cfg(feature = "rtos-trace")]
         trace::task_ready_begin(task.as_ptr() as u32);
 
-        if self.run_queue.enqueue(cs, task) {
+        if self.run_queue.enqueue(task) {
             self.pender.pend();
         }
     }
@@ -394,9 +393,7 @@ impl SyncExecutor {
         #[cfg(feature = "rtos-trace")]
         trace::task_new(task.as_ptr() as u32);
 
-        critical_section::with(|cs| {
-            self.enqueue(cs, task);
-        })
+        self.enqueue(task);
     }
 
     /// # Safety
@@ -552,24 +549,25 @@ impl Executor {
 ///
 /// You can obtain a `TaskRef` from a `Waker` using [`task_from_waker`].
 pub fn wake_task(task: TaskRef) {
-    critical_section::with(|cs| {
-        let header = task.header();
-        let state = header.state.load(Ordering::Relaxed);
+    let header = task.header();
 
+    let res = header.state.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |state| {
         // If already scheduled, or if not started,
         if (state & STATE_RUN_QUEUED != 0) || (state & STATE_SPAWNED == 0) {
-            return;
+            None
+        } else {
+            // Mark it as scheduled
+            Some(state | STATE_RUN_QUEUED)
         }
+    });
 
-        // Mark it as scheduled
-        header.state.store(state | STATE_RUN_QUEUED, Ordering::Relaxed);
-
+    if res.is_ok() {
         // We have just marked the task as scheduled, so enqueue it.
         unsafe {
             let executor = header.executor.get().unwrap_unchecked();
-            executor.enqueue(cs, task);
+            executor.enqueue(task);
         }
-    })
+    }
 }
 
 #[cfg(feature = "integrated-timers")]
diff --git a/embassy-executor/src/raw/run_queue.rs b/embassy-executor/src/raw/run_queue.rs
index 362157535..a88174a0c 100644
--- a/embassy-executor/src/raw/run_queue.rs
+++ b/embassy-executor/src/raw/run_queue.rs
@@ -2,7 +2,6 @@ use core::ptr;
 use core::ptr::NonNull;
 
 use atomic_polyfill::{AtomicPtr, Ordering};
-use critical_section::CriticalSection;
 
 use super::{TaskHeader, TaskRef};
 
@@ -46,11 +45,18 @@ impl RunQueue {
     ///
     /// `item` must NOT be already enqueued in any queue.
     #[inline(always)]
-    pub(crate) unsafe fn enqueue(&self, _cs: CriticalSection, task: TaskRef) -> bool {
-        let prev = self.head.load(Ordering::Relaxed);
-        task.header().run_queue_item.next.store(prev, Ordering::Relaxed);
-        self.head.store(task.as_ptr() as _, Ordering::Relaxed);
-        prev.is_null()
+    pub(crate) unsafe fn enqueue(&self, task: TaskRef) -> bool {
+        let mut was_empty = false;
+
+        self.head
+            .fetch_update(Ordering::SeqCst, Ordering::SeqCst, |prev| {
+                was_empty = prev.is_null();
+                task.header().run_queue_item.next.store(prev, Ordering::Relaxed);
+                Some(task.as_ptr() as *mut _)
+            })
+            .ok();
+
+        was_empty
     }
 
     /// Empty the queue, then call `on_task` for each task that was in the queue.
-- 
cgit