aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpennae <[email protected]>2023-04-16 19:39:32 +0200
committerpennae <[email protected]>2023-04-16 19:45:18 +0200
commit7a682ec02af50026d31296ce5cac6383580f5e55 (patch)
tree9ebb9c3e648fe1aca7ffc2e7dd6730bc656fe66d
parent1fdce6e52a51de89f48f002d5c92139f58029575 (diff)
rp: add division intrinsics
rp2040-hal adds division intrinsics using the hardware divider unit in the SIO, as does the pico-sdk itself. using the hardware is faster than the compiler_rt implementations, and more compact too.
-rw-r--r--embassy-rp/src/intrinsics.rs198
1 files changed, 198 insertions, 0 deletions
diff --git a/embassy-rp/src/intrinsics.rs b/embassy-rp/src/intrinsics.rs
index 3e75fb7fc..3b63846d4 100644
--- a/embassy-rp/src/intrinsics.rs
+++ b/embassy-rp/src/intrinsics.rs
@@ -274,3 +274,201 @@ macro_rules! intrinsics {
274 intrinsics!($($rest)*); 274 intrinsics!($($rest)*);
275 }; 275 };
276} 276}
277
278// Credit: taken from `rp-hal` (also licensed Apache+MIT)
279// https://github.com/rp-rs/rp-hal/blob/main/rp2040-hal/src/sio.rs
280
281// This takes advantage of how AAPCS defines a 64-bit return on 32-bit registers
282// by packing it into r0[0:31] and r1[32:63]. So all we need to do is put
283// the remainder in the high order 32 bits of a 64 bit result. We can also
284// alias the division operators to these for a similar reason r0 is the
285// result either way and r1 a scratch register, so the caller can't assume it
286// retains the argument value.
287#[cfg(target_arch = "arm")]
288core::arch::global_asm!(
289 ".macro hwdivider_head",
290 "ldr r2, =(0xd0000000)", // SIO_BASE
291 // Check the DIRTY state of the divider by shifting it into the C
292 // status bit.
293 "ldr r3, [r2, #0x078]", // DIV_CSR
294 "lsrs r3, #2", // DIRTY = 1, so shift 2 down
295 // We only need to save the state when DIRTY, otherwise we can just do the
296 // division directly.
297 "bcs 2f",
298 "1:",
299 // Do the actual division now, we're either not DIRTY, or we've saved the
300 // state and branched back here so it's safe now.
301 ".endm",
302 ".macro hwdivider_tail",
303 // 8 cycle delay to wait for the result. Each branch takes two cycles
304 // and fits into a 2-byte Thumb instruction, so this is smaller than
305 // 8 NOPs.
306 "b 3f",
307 "3: b 3f",
308 "3: b 3f",
309 "3: b 3f",
310 "3:",
311 // Read the quotient last, since that's what clears the dirty flag.
312 "ldr r1, [r2, #0x074]", // DIV_REMAINDER
313 "ldr r0, [r2, #0x070]", // DIV_QUOTIENT
314 // Either return to the caller or back to the state restore.
315 "bx lr",
316 "2:",
317 // Since we can't save the signed-ness of the calculation, we have to make
318 // sure that there's at least an 8 cycle delay before we read the result.
319 // The push takes 5 cycles, and we've already spent at least 7 checking
320 // the DIRTY state to get here.
321 "push {{r4-r6, lr}}",
322 // Read the quotient last, since that's what clears the dirty flag.
323 "ldr r3, [r2, #0x060]", // DIV_UDIVIDEND
324 "ldr r4, [r2, #0x064]", // DIV_UDIVISOR
325 "ldr r5, [r2, #0x074]", // DIV_REMAINDER
326 "ldr r6, [r2, #0x070]", // DIV_QUOTIENT
327 // If we get interrupted here (before a write sets the DIRTY flag) it's
328 // fine, since we have the full state, so the interruptor doesn't have to
329 // restore it. Once the write happens and the DIRTY flag is set, the
330 // interruptor becomes responsible for restoring our state.
331 "bl 1b",
332 // If we are interrupted here, then the interruptor will start an incorrect
333 // calculation using a wrong divisor, but we'll restore the divisor and
334 // result ourselves correctly. This sets DIRTY, so any interruptor will
335 // save the state.
336 "str r3, [r2, #0x060]", // DIV_UDIVIDEND
337 // If we are interrupted here, the the interruptor may start the
338 // calculation using incorrectly signed inputs, but we'll restore the
339 // result ourselves. This sets DIRTY, so any interruptor will save the
340 // state.
341 "str r4, [r2, #0x064]", // DIV_UDIVISOR
342 // If we are interrupted here, the interruptor will have restored
343 // everything but the quotient may be wrongly signed. If the calculation
344 // started by the above writes is still ongoing it is stopped, so it won't
345 // replace the result we're restoring. DIRTY and READY set, but only
346 // DIRTY matters to make the interruptor save the state.
347 "str r5, [r2, #0x074]", // DIV_REMAINDER
348 // State fully restored after the quotient write. This sets both DIRTY
349 // and READY, so whatever we may have interrupted can read the result.
350 "str r6, [r2, #0x070]", // DIV_QUOTIENT
351 "pop {{r4-r6, pc}}",
352 ".endm",
353);
354
355macro_rules! division_function {
356 (
357 $name:ident $($intrinsic:ident)* ( $argty:ty ) {
358 $($begin:literal),+
359 }
360 ) => {
361 #[cfg(all(target_arch = "arm", feature = "intrinsics"))]
362 core::arch::global_asm!(
363 // Mangle the name slightly, since this is a global symbol.
364 concat!(".global _rphal_", stringify!($name)),
365 concat!(".type _rphal_", stringify!($name), ", %function"),
366 ".align 2",
367 concat!("_rphal_", stringify!($name), ":"),
368 $(
369 concat!(".global ", stringify!($intrinsic)),
370 concat!(".type ", stringify!($intrinsic), ", %function"),
371 concat!(stringify!($intrinsic), ":"),
372 )*
373
374 "hwdivider_head",
375 $($begin),+ ,
376 "hwdivider_tail",
377 );
378
379 #[cfg(all(target_arch = "arm", not(feature = "intrinsics")))]
380 core::arch::global_asm!(
381 // Mangle the name slightly, since this is a global symbol.
382 concat!(".global _rphal_", stringify!($name)),
383 concat!(".type _rphal_", stringify!($name), ", %function"),
384 ".align 2",
385 concat!("_rphal_", stringify!($name), ":"),
386
387 "hwdivider_head",
388 $($begin),+ ,
389 "hwdivider_tail",
390 );
391
392 #[cfg(target_arch = "arm")]
393 extern "aapcs" {
394 // Connect a local name to global symbol above through FFI.
395 #[link_name = concat!("_rphal_", stringify!($name)) ]
396 fn $name(n: $argty, d: $argty) -> u64;
397 }
398
399 #[cfg(not(target_arch = "arm"))]
400 #[allow(unused_variables)]
401 unsafe fn $name(n: $argty, d: $argty) -> u64 { 0 }
402 };
403}
404
405division_function! {
406 unsigned_divmod __aeabi_uidivmod __aeabi_uidiv ( u32 ) {
407 "str r0, [r2, #0x060]", // DIV_UDIVIDEND
408 "str r1, [r2, #0x064]" // DIV_UDIVISOR
409 }
410}
411
412division_function! {
413 signed_divmod __aeabi_idivmod __aeabi_idiv ( i32 ) {
414 "str r0, [r2, #0x068]", // DIV_SDIVIDEND
415 "str r1, [r2, #0x06c]" // DIV_SDIVISOR
416 }
417}
418
419fn divider_unsigned(n: u32, d: u32) -> DivResult<u32> {
420 let packed = unsafe { unsigned_divmod(n, d) };
421 DivResult {
422 quotient: packed as u32,
423 remainder: (packed >> 32) as u32,
424 }
425}
426
427fn divider_signed(n: i32, d: i32) -> DivResult<i32> {
428 let packed = unsafe { signed_divmod(n, d) };
429 // Double casts to avoid sign extension
430 DivResult {
431 quotient: packed as u32 as i32,
432 remainder: (packed >> 32) as u32 as i32,
433 }
434}
435
436/// Result of divide/modulo operation
437struct DivResult<T> {
438 /// The quotient of divide/modulo operation
439 pub quotient: T,
440 /// The remainder of divide/modulo operation
441 pub remainder: T,
442}
443
444intrinsics! {
445 extern "C" fn __udivsi3(n: u32, d: u32) -> u32 {
446 divider_unsigned(n, d).quotient
447 }
448
449 extern "C" fn __umodsi3(n: u32, d: u32) -> u32 {
450 divider_unsigned(n, d).remainder
451 }
452
453 extern "C" fn __udivmodsi4(n: u32, d: u32, rem: Option<&mut u32>) -> u32 {
454 let quo_rem = divider_unsigned(n, d);
455 if let Some(rem) = rem {
456 *rem = quo_rem.remainder;
457 }
458 quo_rem.quotient
459 }
460
461 extern "C" fn __divsi3(n: i32, d: i32) -> i32 {
462 divider_signed(n, d).quotient
463 }
464
465 extern "C" fn __modsi3(n: i32, d: i32) -> i32 {
466 divider_signed(n, d).remainder
467 }
468
469 extern "C" fn __divmodsi4(n: i32, d: i32, rem: &mut i32) -> i32 {
470 let quo_rem = divider_signed(n, d);
471 *rem = quo_rem.remainder;
472 quo_rem.quotient
473 }
474}