1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
|
#![macro_use]
// Credit: taken from `rp-hal` (also licensed Apache+MIT)
// https://github.com/rp-rs/rp-hal/blob/main/rp2040-hal/src/intrinsics.rs
/// Generate a series of aliases for an intrinsic function.
macro_rules! intrinsics_aliases {
(
extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty,
) => {};
(
unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty,
) => {};
(
extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty,
$alias:ident
$($rest:ident)*
) => {
#[cfg(all(target_arch = "arm", feature = "intrinsics"))]
intrinsics! {
extern $abi fn $alias( $($argname: $ty),* ) -> $ret {
$name($($argname),*)
}
}
intrinsics_aliases! {
extern $abi fn $name( $($argname: $ty),* ) -> $ret,
$($rest)*
}
};
(
unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty,
$alias:ident
$($rest:ident)*
) => {
#[cfg(all(target_arch = "arm", feature = "intrinsics"))]
intrinsics! {
unsafe extern $abi fn $alias( $($argname: $ty),* ) -> $ret {
$name($($argname),*)
}
}
intrinsics_aliases! {
unsafe extern $abi fn $name( $($argname: $ty),* ) -> $ret,
$($rest)*
}
};
}
/// The macro used to define overridden intrinsics.
///
/// This is heavily inspired by the macro used by compiler-builtins. The idea
/// is to abstract anything special that needs to be done to override an
/// intrinsic function. Intrinsic generation is disabled for non-ARM targets
/// so things like CI and docs generation do not have problems. Additionally
/// they can be disabled by disabling the crate feature `intrinsics` for
/// testing or comparing performance.
///
/// Like the compiler-builtins macro, it accepts a series of functions that
/// looks like normal Rust code:
///
/// ```rust,ignore
/// intrinsics! {
/// extern "C" fn foo(a: i32) -> u32 {
/// // ...
/// }
/// #[nonstandard_attribute]
/// extern "C" fn bar(a: i32) -> u32 {
/// // ...
/// }
/// }
/// ```
///
/// Each function can also be decorated with nonstandard attributes to control
/// additional behaviour:
///
/// * `slower_than_default` - indicates that the override is slower than the
/// default implementation. Currently this just disables the override
/// entirely.
/// * `bootrom_v2` - indicates that the override is only available
/// on a V2 bootrom or higher. Only enabled when the feature
/// `rom-v2-intrinsics` is set.
/// * `alias` - accepts a list of names to alias the intrinsic to.
/// * `aeabi` - accepts a list of ARM EABI names to alias to.
///
macro_rules! intrinsics {
() => {};
(
#[slower_than_default]
$(#[$($attr:tt)*])*
extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty {
$($body:tt)*
}
$($rest:tt)*
) => {
// Not exported, but defined so the actual implementation is
// considered used
#[allow(dead_code)]
fn $name( $($argname: $ty),* ) -> $ret {
$($body)*
}
intrinsics!($($rest)*);
};
(
#[bootrom_v2]
$(#[$($attr:tt)*])*
extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty {
$($body:tt)*
}
$($rest:tt)*
) => {
// Not exported, but defined so the actual implementation is
// considered used
#[cfg(not(feature = "rom-v2-intrinsics"))]
#[allow(dead_code)]
fn $name( $($argname: $ty),* ) -> $ret {
$($body)*
}
#[cfg(feature = "rom-v2-intrinsics")]
intrinsics! {
$(#[$($attr)*])*
extern $abi fn $name( $($argname: $ty),* ) -> $ret {
$($body)*
}
}
intrinsics!($($rest)*);
};
(
#[alias = $($alias:ident),*]
$(#[$($attr:tt)*])*
extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty {
$($body:tt)*
}
$($rest:tt)*
) => {
intrinsics! {
$(#[$($attr)*])*
extern $abi fn $name( $($argname: $ty),* ) -> $ret {
$($body)*
}
}
intrinsics_aliases! {
extern $abi fn $name( $($argname: $ty),* ) -> $ret,
$($alias) *
}
intrinsics!($($rest)*);
};
(
#[alias = $($alias:ident),*]
$(#[$($attr:tt)*])*
unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty {
$($body:tt)*
}
$($rest:tt)*
) => {
intrinsics! {
$(#[$($attr)*])*
unsafe extern $abi fn $name( $($argname: $ty),* ) -> $ret {
$($body)*
}
}
intrinsics_aliases! {
unsafe extern $abi fn $name( $($argname: $ty),* ) -> $ret,
$($alias) *
}
intrinsics!($($rest)*);
};
(
#[aeabi = $($alias:ident),*]
$(#[$($attr:tt)*])*
extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty {
$($body:tt)*
}
$($rest:tt)*
) => {
intrinsics! {
$(#[$($attr)*])*
extern $abi fn $name( $($argname: $ty),* ) -> $ret {
$($body)*
}
}
intrinsics_aliases! {
extern "aapcs" fn $name( $($argname: $ty),* ) -> $ret,
$($alias) *
}
intrinsics!($($rest)*);
};
(
$(#[$($attr:tt)*])*
extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty {
$($body:tt)*
}
$($rest:tt)*
) => {
#[cfg(all(target_arch = "arm", feature = "intrinsics"))]
$(#[$($attr)*])*
extern $abi fn $name( $($argname: $ty),* ) -> $ret {
$($body)*
}
#[cfg(all(target_arch = "arm", feature = "intrinsics"))]
mod $name {
#[unsafe(no_mangle)]
$(#[$($attr)*])*
pub extern $abi fn $name( $($argname: $ty),* ) -> $ret {
super::$name($($argname),*)
}
}
// Not exported, but defined so the actual implementation is
// considered used
#[cfg(not(all(target_arch = "arm", feature = "intrinsics")))]
#[allow(dead_code)]
fn $name( $($argname: $ty),* ) -> $ret {
$($body)*
}
intrinsics!($($rest)*);
};
(
$(#[$($attr:tt)*])*
unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty {
$($body:tt)*
}
$($rest:tt)*
) => {
#[cfg(all(target_arch = "arm", feature = "intrinsics"))]
$(#[$($attr)*])*
unsafe extern $abi fn $name( $($argname: $ty),* ) -> $ret {
$($body)*
}
#[cfg(all(target_arch = "arm", feature = "intrinsics"))]
mod $name {
#[unsafe(no_mangle)]
$(#[$($attr)*])*
pub unsafe extern $abi fn $name( $($argname: $ty),* ) -> $ret {
super::$name($($argname),*)
}
}
// Not exported, but defined so the actual implementation is
// considered used
#[cfg(not(all(target_arch = "arm", feature = "intrinsics")))]
#[allow(dead_code)]
unsafe fn $name( $($argname: $ty),* ) -> $ret {
$($body)*
}
intrinsics!($($rest)*);
};
}
// Credit: taken from `rp-hal` (also licensed Apache+MIT)
// https://github.com/rp-rs/rp-hal/blob/main/rp2040-hal/src/sio.rs
// This takes advantage of how AAPCS defines a 64-bit return on 32-bit registers
// by packing it into r0[0:31] and r1[32:63]. So all we need to do is put
// the remainder in the high order 32 bits of a 64 bit result. We can also
// alias the division operators to these for a similar reason r0 is the
// result either way and r1 a scratch register, so the caller can't assume it
// retains the argument value.
#[cfg(target_arch = "arm")]
core::arch::global_asm!(
".macro hwdivider_head",
"ldr r2, =(0xd0000000)", // SIO_BASE
// Check the DIRTY state of the divider by shifting it into the C
// status bit.
"ldr r3, [r2, #0x078]", // DIV_CSR
"lsrs r3, #2", // DIRTY = 1, so shift 2 down
// We only need to save the state when DIRTY, otherwise we can just do the
// division directly.
"bcs 2f",
"1:",
// Do the actual division now, we're either not DIRTY, or we've saved the
// state and branched back here so it's safe now.
".endm",
".macro hwdivider_tail",
// 8 cycle delay to wait for the result. Each branch takes two cycles
// and fits into a 2-byte Thumb instruction, so this is smaller than
// 8 NOPs.
"b 3f",
"3: b 3f",
"3: b 3f",
"3: b 3f",
"3:",
// Read the quotient last, since that's what clears the dirty flag.
"ldr r1, [r2, #0x074]", // DIV_REMAINDER
"ldr r0, [r2, #0x070]", // DIV_QUOTIENT
// Either return to the caller or back to the state restore.
"bx lr",
"2:",
// Since we can't save the signed-ness of the calculation, we have to make
// sure that there's at least an 8 cycle delay before we read the result.
// The push takes 5 cycles, and we've already spent at least 7 checking
// the DIRTY state to get here.
"push {{r4-r6, lr}}",
// Read the quotient last, since that's what clears the dirty flag.
"ldr r3, [r2, #0x060]", // DIV_UDIVIDEND
"ldr r4, [r2, #0x064]", // DIV_UDIVISOR
"ldr r5, [r2, #0x074]", // DIV_REMAINDER
"ldr r6, [r2, #0x070]", // DIV_QUOTIENT
// If we get interrupted here (before a write sets the DIRTY flag) it's
// fine, since we have the full state, so the interruptor doesn't have to
// restore it. Once the write happens and the DIRTY flag is set, the
// interruptor becomes responsible for restoring our state.
"bl 1b",
// If we are interrupted here, then the interruptor will start an incorrect
// calculation using a wrong divisor, but we'll restore the divisor and
// result ourselves correctly. This sets DIRTY, so any interruptor will
// save the state.
"str r3, [r2, #0x060]", // DIV_UDIVIDEND
// If we are interrupted here, the interruptor may start the calculation
// using incorrectly signed inputs, but we'll restore the result ourselves.
// This sets DIRTY, so any interruptor will save the state.
"str r4, [r2, #0x064]", // DIV_UDIVISOR
// If we are interrupted here, the interruptor will have restored
// everything but the quotient may be wrongly signed. If the calculation
// started by the above writes is still ongoing it is stopped, so it won't
// replace the result we're restoring. DIRTY and READY set, but only
// DIRTY matters to make the interruptor save the state.
"str r5, [r2, #0x074]", // DIV_REMAINDER
// State fully restored after the quotient write. This sets both DIRTY
// and READY, so whatever we may have interrupted can read the result.
"str r6, [r2, #0x070]", // DIV_QUOTIENT
"pop {{r4-r6, pc}}",
".endm",
);
macro_rules! division_function {
(
$name:ident $($intrinsic:ident)* ( $argty:ty ) {
$($begin:literal),+
}
) => {
#[cfg(all(target_arch = "arm", feature = "intrinsics"))]
core::arch::global_asm!(
// Mangle the name slightly, since this is a global symbol.
concat!(".section .text._erphal_", stringify!($name)),
concat!(".global _erphal_", stringify!($name)),
concat!(".type _erphal_", stringify!($name), ", %function"),
".align 2",
concat!("_erphal_", stringify!($name), ":"),
$(
concat!(".global ", stringify!($intrinsic)),
concat!(".type ", stringify!($intrinsic), ", %function"),
concat!(stringify!($intrinsic), ":"),
)*
"hwdivider_head",
$($begin),+ ,
"hwdivider_tail",
);
#[cfg(all(target_arch = "arm", not(feature = "intrinsics")))]
core::arch::global_asm!(
// Mangle the name slightly, since this is a global symbol.
concat!(".section .text._erphal_", stringify!($name)),
concat!(".global _erphal_", stringify!($name)),
concat!(".type _erphal_", stringify!($name), ", %function"),
".align 2",
concat!("_erphal_", stringify!($name), ":"),
"hwdivider_head",
$($begin),+ ,
"hwdivider_tail",
);
#[cfg(target_arch = "arm")]
unsafe extern "aapcs" {
// Connect a local name to global symbol above through FFI.
#[link_name = concat!("_erphal_", stringify!($name)) ]
fn $name(n: $argty, d: $argty) -> u64;
}
#[cfg(not(target_arch = "arm"))]
#[allow(unused_variables)]
unsafe fn $name(n: $argty, d: $argty) -> u64 { 0 }
};
}
division_function! {
unsigned_divmod __aeabi_uidivmod __aeabi_uidiv ( u32 ) {
"str r0, [r2, #0x060]", // DIV_UDIVIDEND
"str r1, [r2, #0x064]" // DIV_UDIVISOR
}
}
division_function! {
signed_divmod __aeabi_idivmod __aeabi_idiv ( i32 ) {
"str r0, [r2, #0x068]", // DIV_SDIVIDEND
"str r1, [r2, #0x06c]" // DIV_SDIVISOR
}
}
fn divider_unsigned(n: u32, d: u32) -> DivResult<u32> {
let packed = unsafe { unsigned_divmod(n, d) };
DivResult {
quotient: packed as u32,
remainder: (packed >> 32) as u32,
}
}
fn divider_signed(n: i32, d: i32) -> DivResult<i32> {
let packed = unsafe { signed_divmod(n, d) };
// Double casts to avoid sign extension
DivResult {
quotient: packed as u32 as i32,
remainder: (packed >> 32) as u32 as i32,
}
}
/// Result of divide/modulo operation
struct DivResult<T> {
/// The quotient of divide/modulo operation
pub quotient: T,
/// The remainder of divide/modulo operation
pub remainder: T,
}
intrinsics! {
extern "C" fn __udivsi3(n: u32, d: u32) -> u32 {
divider_unsigned(n, d).quotient
}
extern "C" fn __umodsi3(n: u32, d: u32) -> u32 {
divider_unsigned(n, d).remainder
}
extern "C" fn __udivmodsi4(n: u32, d: u32, rem: Option<&mut u32>) -> u32 {
let quo_rem = divider_unsigned(n, d);
if let Some(rem) = rem {
*rem = quo_rem.remainder;
}
quo_rem.quotient
}
extern "C" fn __divsi3(n: i32, d: i32) -> i32 {
divider_signed(n, d).quotient
}
extern "C" fn __modsi3(n: i32, d: i32) -> i32 {
divider_signed(n, d).remainder
}
extern "C" fn __divmodsi4(n: i32, d: i32, rem: &mut i32) -> i32 {
let quo_rem = divider_signed(n, d);
*rem = quo_rem.remainder;
quo_rem.quotient
}
}
|