aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoreZio Pan <[email protected]>2024-03-19 20:09:36 +0800
committereZio Pan <[email protected]>2024-03-23 09:15:25 +0800
commit10a9cce855fbf383a8f0ea5511526777062a03c4 (patch)
tree3b207fad61c8c93e6b94f93adb7a0fdfef7d40bd
parent2fa04d93ed93bed97c7575019aea32c2543e322c (diff)
stm32 CORDIC: DMA for q1.31 and q1.15
-rw-r--r--embassy-stm32/src/cordic/mod.rs264
-rw-r--r--embassy-stm32/src/cordic/utils.rs13
2 files changed, 209 insertions, 68 deletions
diff --git a/embassy-stm32/src/cordic/mod.rs b/embassy-stm32/src/cordic/mod.rs
index 9875d73bb..a4b98a770 100644
--- a/embassy-stm32/src/cordic/mod.rs
+++ b/embassy-stm32/src/cordic/mod.rs
@@ -1,5 +1,6 @@
1//! CORDIC co-processor 1//! CORDIC co-processor
2 2
3use embassy_hal_internal::drop::OnDrop;
3use embassy_hal_internal::{into_ref, Peripheral, PeripheralRef}; 4use embassy_hal_internal::{into_ref, Peripheral, PeripheralRef};
4 5
5use crate::{dma, peripherals}; 6use crate::{dma, peripherals};
@@ -100,9 +101,6 @@ impl<'d, T: Instance> Cordic<'d, T> {
100 warn!("At least 1 result hasn't been read, reconfigure will cause DATA LOST"); 101 warn!("At least 1 result hasn't been read, reconfigure will cause DATA LOST");
101 }; 102 };
102 103
103 self.peri.disable_write_dma();
104 self.peri.disable_read_dma();
105
106 // clean RRDY flag 104 // clean RRDY flag
107 while self.peri.ready_to_read() { 105 while self.peri.ready_to_read() {
108 self.peri.read_result(); 106 self.peri.read_result();
@@ -115,22 +113,6 @@ impl<'d, T: Instance> Cordic<'d, T> {
115 // we don't set NRES in here, but to make sure NRES is set each time user call "calc"-ish functions, 113 // we don't set NRES in here, but to make sure NRES is set each time user call "calc"-ish functions,
116 // since each "calc"-ish functions can have different ARGSIZE and RESSIZE, thus NRES should be change accrodingly. 114 // since each "calc"-ish functions can have different ARGSIZE and RESSIZE, thus NRES should be change accrodingly.
117 } 115 }
118
119 fn blocking_read_f32(&mut self) -> (f32, Option<f32>) {
120 let reg_value = self.peri.read_result();
121
122 let res1 = utils::q1_15_to_f32((reg_value & ((1u32 << 16) - 1)) as u16);
123
124 // We don't care about whether the function return 1 or 2 results,
125 // the only thing matter is whether user want 1 or 2 results.
126 let res2 = if !self.config.first_result {
127 Some(utils::q1_15_to_f32((reg_value >> 16) as u16))
128 } else {
129 None
130 };
131
132 (res1, res2)
133 }
134} 116}
135 117
136impl<'d, T: Instance> Drop for Cordic<'d, T> { 118impl<'d, T: Instance> Drop for Cordic<'d, T> {
@@ -141,7 +123,7 @@ impl<'d, T: Instance> Drop for Cordic<'d, T> {
141 123
142// q1.31 related 124// q1.31 related
143impl<'d, T: Instance> Cordic<'d, T> { 125impl<'d, T: Instance> Cordic<'d, T> {
144 /// Run a blocking CORDIC calculation 126 /// Run a blocking CORDIC calculation in q1.31 format
145 pub fn blocking_calc_32bit(&mut self, arg1s: &[f64], arg2s: Option<&[f64]>, output: &mut [f64]) -> usize { 127 pub fn blocking_calc_32bit(&mut self, arg1s: &[f64], arg2s: Option<&[f64]>, output: &mut [f64]) -> usize {
146 if arg1s.is_empty() { 128 if arg1s.is_empty() {
147 return 0; 129 return 0;
@@ -157,9 +139,6 @@ impl<'d, T: Instance> Cordic<'d, T> {
157 139
158 self.check_input_f64(arg1s, arg2s); 140 self.check_input_f64(arg1s, arg2s);
159 141
160 self.peri.disable_write_dma();
161 self.peri.disable_read_dma();
162
163 self.peri.set_result_count(if self.config.first_result { 142 self.peri.set_result_count(if self.config.first_result {
164 Count::One 143 Count::One
165 } else { 144 } else {
@@ -172,7 +151,10 @@ impl<'d, T: Instance> Cordic<'d, T> {
172 151
173 let mut consumed_input_len = 0; 152 let mut consumed_input_len = 0;
174 153
175 // put double input into cordic 154 //
155 // handle 2 input args calculation
156 //
157
176 if arg2s.is_some() && !arg2s.expect("It's infailable").is_empty() { 158 if arg2s.is_some() && !arg2s.expect("It's infailable").is_empty() {
177 let arg2s = arg2s.expect("It's infailable"); 159 let arg2s = arg2s.expect("It's infailable");
178 160
@@ -202,7 +184,10 @@ impl<'d, T: Instance> Cordic<'d, T> {
202 self.blocking_read_f64_to_buf(output, &mut output_count); 184 self.blocking_read_f64_to_buf(output, &mut output_count);
203 } 185 }
204 186
205 // put single input into cordic 187 //
188 // handle 1 input arg calculation
189 //
190
206 let input_left = &arg1s[consumed_input_len..]; 191 let input_left = &arg1s[consumed_input_len..];
207 192
208 if !input_left.is_empty() { 193 if !input_left.is_empty() {
@@ -225,27 +210,14 @@ impl<'d, T: Instance> Cordic<'d, T> {
225 output_count 210 output_count
226 } 211 }
227 212
228 fn blocking_read_f64(&mut self) -> (f64, Option<f64>) {
229 let res1 = utils::q1_31_to_f64(self.peri.read_result());
230
231 // We don't care about whether the function return 1 or 2 results,
232 // the only thing matter is whether user want 1 or 2 results.
233 let res2 = if !self.config.first_result {
234 Some(utils::q1_31_to_f64(self.peri.read_result()))
235 } else {
236 None
237 };
238
239 (res1, res2)
240 }
241
242 fn blocking_read_f64_to_buf(&mut self, result_buf: &mut [f64], result_index: &mut usize) { 213 fn blocking_read_f64_to_buf(&mut self, result_buf: &mut [f64], result_index: &mut usize) {
243 let (res1, res2) = self.blocking_read_f64(); 214 result_buf[*result_index] = utils::q1_31_to_f64(self.peri.read_result());
244 result_buf[*result_index] = res1;
245 *result_index += 1; 215 *result_index += 1;
246 216
247 if let Some(res2) = res2 { 217 // We don't care about whether the function return 1 or 2 results,
248 result_buf[*result_index] = res2; 218 // the only thing matter is whether user want 1 or 2 results.
219 if !self.config.first_result {
220 result_buf[*result_index] = utils::q1_31_to_f64(self.peri.read_result());
249 *result_index += 1; 221 *result_index += 1;
250 } 222 }
251 } 223 }
@@ -254,7 +226,7 @@ impl<'d, T: Instance> Cordic<'d, T> {
254 self.peri.write_argument(utils::f64_to_q1_31(arg)); 226 self.peri.write_argument(utils::f64_to_q1_31(arg));
255 } 227 }
256 228
257 /// Run a async CORDIC calculation 229 /// Run a async CORDIC calculation in q.1.31 format
258 pub async fn async_calc_32bit( 230 pub async fn async_calc_32bit(
259 &mut self, 231 &mut self,
260 write_dma: impl Peripheral<P = impl WriteDma<T>>, 232 write_dma: impl Peripheral<P = impl WriteDma<T>>,
@@ -292,8 +264,9 @@ impl<'d, T: Instance> Cordic<'d, T> {
292 let mut input_buf = [0u32; INPUT_BUF_MAX_LEN]; 264 let mut input_buf = [0u32; INPUT_BUF_MAX_LEN];
293 let mut input_buf_len = 0; 265 let mut input_buf_len = 0;
294 266
295 self.peri.enable_write_dma(); 267 //
296 self.peri.enable_read_dma(); 268 // handle 2 input args calculation
269 //
297 270
298 if !arg2s.unwrap_or_default().is_empty() { 271 if !arg2s.unwrap_or_default().is_empty() {
299 let arg2s = arg2s.expect("It's infailable"); 272 let arg2s = arg2s.expect("It's infailable");
@@ -311,7 +284,7 @@ impl<'d, T: Instance> Cordic<'d, T> {
311 } 284 }
312 285
313 if input_buf_len == INPUT_BUF_MAX_LEN { 286 if input_buf_len == INPUT_BUF_MAX_LEN {
314 self.dma_calc_32bit( 287 self.inner_dma_calc_32bit(
315 &mut write_dma, 288 &mut write_dma,
316 &mut read_dma, 289 &mut read_dma,
317 true, 290 true,
@@ -325,12 +298,8 @@ impl<'d, T: Instance> Cordic<'d, T> {
325 } 298 }
326 } 299 }
327 300
328 if input_buf_len % 2 != 0 {
329 panic!("input buf len should be multiple of 2 in double mode")
330 }
331
332 if input_buf_len > 0 { 301 if input_buf_len > 0 {
333 self.dma_calc_32bit( 302 self.inner_dma_calc_32bit(
334 &mut write_dma, 303 &mut write_dma,
335 &mut read_dma, 304 &mut read_dma,
336 true, 305 true,
@@ -344,7 +313,9 @@ impl<'d, T: Instance> Cordic<'d, T> {
344 } 313 }
345 } 314 }
346 315
347 // single input 316 //
317 // handle 1 input arg calculation
318 //
348 319
349 if arg1s.len() > consumed_input_len { 320 if arg1s.len() > consumed_input_len {
350 let input_remain = &arg1s[consumed_input_len..]; 321 let input_remain = &arg1s[consumed_input_len..];
@@ -356,7 +327,7 @@ impl<'d, T: Instance> Cordic<'d, T> {
356 input_buf_len += 1; 327 input_buf_len += 1;
357 328
358 if input_buf_len == INPUT_BUF_MAX_LEN { 329 if input_buf_len == INPUT_BUF_MAX_LEN {
359 self.dma_calc_32bit( 330 self.inner_dma_calc_32bit(
360 &mut write_dma, 331 &mut write_dma,
361 &mut read_dma, 332 &mut read_dma,
362 false, 333 false,
@@ -371,7 +342,7 @@ impl<'d, T: Instance> Cordic<'d, T> {
371 } 342 }
372 343
373 if input_buf_len > 0 { 344 if input_buf_len > 0 {
374 self.dma_calc_32bit( 345 self.inner_dma_calc_32bit(
375 &mut write_dma, 346 &mut write_dma,
376 &mut read_dma, 347 &mut read_dma,
377 false, 348 false,
@@ -388,32 +359,47 @@ impl<'d, T: Instance> Cordic<'d, T> {
388 output_count 359 output_count
389 } 360 }
390 361
391 async fn dma_calc_32bit( 362 // this function is highly coupled with async_calc_32bit, and is not intended to use in other place
363 async fn inner_dma_calc_32bit(
392 &mut self, 364 &mut self,
393 write_dma: impl Peripheral<P = impl WriteDma<T>>, 365 write_dma: impl Peripheral<P = impl WriteDma<T>>,
394 read_dma: impl Peripheral<P = impl ReadDma<T>>, 366 read_dma: impl Peripheral<P = impl ReadDma<T>>,
395 double_input: bool, 367 double_input: bool, // gether extra info to calc output_buf size
396 input_buf: &[u32], 368 input_buf: &[u32], // input_buf, its content should be extact values and length for calculation
397 output: &mut [f64], 369 output: &mut [f64], // caller uses should this as a final output array
398 output_start_index: &mut usize, 370 output_start_index: &mut usize, // the index of start point of the output for this round of calculation
399 ) { 371 ) {
400 into_ref!(write_dma, read_dma); 372 into_ref!(write_dma, read_dma);
401 373
402 let write_req = write_dma.request(); 374 let write_req = write_dma.request();
403 let read_req = read_dma.request(); 375 let read_req = read_dma.request();
404 376
405 let mut output_buf = [0u32; INPUT_BUF_MAX_LEN * 2]; // make output_buf long enough 377 // output_buf is the place to store raw value from CORDIC (via DMA).
378 // For buf size, we assume in this round of calculation:
379 // all input is 1 arg, and all calculation need 2 output,
380 // thus output_buf will always be long enough.
381 let mut output_buf = [0u32; INPUT_BUF_MAX_LEN * 2];
406 382
407 let mut output_buf_size = input_buf.len(); 383 let mut output_buf_size = input_buf.len();
408 if !self.config.first_result { 384 if !self.config.first_result {
385 // if we need 2 result for 1 input, then output_buf length should be 2x long.
409 output_buf_size *= 2; 386 output_buf_size *= 2;
410 }; 387 };
411 if double_input { 388 if double_input {
389 // if input itself is 2 args for 1 calculation, then output_buf length should be /2.
412 output_buf_size /= 2; 390 output_buf_size /= 2;
413 } 391 }
414 392
415 let active_output_buf = &mut output_buf[..output_buf_size]; 393 let active_output_buf = &mut output_buf[..output_buf_size];
416 394
395 self.peri.enable_write_dma();
396 self.peri.enable_read_dma();
397
398 let on_drop = OnDrop::new(|| {
399 self.peri.disable_write_dma();
400 self.peri.disable_read_dma();
401 });
402
417 unsafe { 403 unsafe {
418 let write_transfer = dma::Transfer::new_write( 404 let write_transfer = dma::Transfer::new_write(
419 &mut write_dma, 405 &mut write_dma,
@@ -434,6 +420,8 @@ impl<'d, T: Instance> Cordic<'d, T> {
434 embassy_futures::join::join(write_transfer, read_transfer).await; 420 embassy_futures::join::join(write_transfer, read_transfer).await;
435 } 421 }
436 422
423 drop(on_drop);
424
437 for &mut output_u32 in active_output_buf { 425 for &mut output_u32 in active_output_buf {
438 output[*output_start_index] = utils::q1_31_to_f64(output_u32); 426 output[*output_start_index] = utils::q1_31_to_f64(output_u32);
439 *output_start_index += 1; 427 *output_start_index += 1;
@@ -443,7 +431,7 @@ impl<'d, T: Instance> Cordic<'d, T> {
443 431
444// q1.15 related 432// q1.15 related
445impl<'d, T: Instance> Cordic<'d, T> { 433impl<'d, T: Instance> Cordic<'d, T> {
446 /// Run a CORDIC calculation 434 /// Run a blocking CORDIC calculation in q1.15 format
447 pub fn blocking_calc_16bit(&mut self, arg1s: &[f32], arg2s: Option<&[f32]>, output: &mut [f32]) -> usize { 435 pub fn blocking_calc_16bit(&mut self, arg1s: &[f32], arg2s: Option<&[f32]>, output: &mut [f32]) -> usize {
448 if arg1s.is_empty() { 436 if arg1s.is_empty() {
449 return 0; 437 return 0;
@@ -459,9 +447,6 @@ impl<'d, T: Instance> Cordic<'d, T> {
459 447
460 self.check_input_f32(arg1s, arg2s); 448 self.check_input_f32(arg1s, arg2s);
461 449
462 self.peri.disable_write_dma();
463 self.peri.disable_read_dma();
464
465 // In q1.15 mode, 1 write/read to access 2 arguments/results 450 // In q1.15 mode, 1 write/read to access 2 arguments/results
466 self.peri.set_argument_count(Count::One); 451 self.peri.set_argument_count(Count::One);
467 self.peri.set_result_count(Count::One); 452 self.peri.set_result_count(Count::One);
@@ -506,20 +491,163 @@ impl<'d, T: Instance> Cordic<'d, T> {
506 } 491 }
507 492
508 fn blocking_write_f32(&mut self, arg1: f32, arg2: f32) { 493 fn blocking_write_f32(&mut self, arg1: f32, arg2: f32) {
509 let reg_value: u32 = utils::f32_to_q1_15(arg1) as u32 + ((utils::f32_to_q1_15(arg2) as u32) << 16); 494 let reg_value: u32 = utils::f32_args_to_u32(arg1, arg2);
510 self.peri.write_argument(reg_value); 495 self.peri.write_argument(reg_value);
511 } 496 }
512 497
513 fn blocking_read_f32_to_buf(&mut self, result_buf: &mut [f32], result_index: &mut usize) { 498 fn blocking_read_f32_to_buf(&mut self, result_buf: &mut [f32], result_index: &mut usize) {
514 let (res1, res2) = self.blocking_read_f32(); 499 let reg_value = self.peri.read_result();
500
501 let (res1, res2) = utils::u32_to_f32_res(reg_value);
502
515 result_buf[*result_index] = res1; 503 result_buf[*result_index] = res1;
516 *result_index += 1; 504 *result_index += 1;
517 505
518 if let Some(res2) = res2 { 506 // We don't care about whether the function return 1 or 2 results,
507 // the only thing matter is whether user want 1 or 2 results.
508 if !self.config.first_result {
519 result_buf[*result_index] = res2; 509 result_buf[*result_index] = res2;
520 *result_index += 1; 510 *result_index += 1;
521 } 511 }
522 } 512 }
513
514 /// Run a async CORDIC calculation in q1.15 format
515 pub async fn async_calc_16bit(
516 &mut self,
517 write_dma: impl Peripheral<P = impl WriteDma<T>>,
518 read_dma: impl Peripheral<P = impl ReadDma<T>>,
519 arg1s: &[f32],
520 arg2s: Option<&[f32]>,
521 output: &mut [f32],
522 ) -> usize {
523 if arg1s.is_empty() {
524 return 0;
525 }
526
527 assert!(
528 match self.config.first_result {
529 true => output.len() >= arg1s.len(),
530 false => output.len() >= 2 * arg1s.len(),
531 },
532 "Output buf length is not long enough"
533 );
534
535 self.check_input_f32(arg1s, arg2s);
536
537 into_ref!(write_dma, read_dma);
538
539 // In q1.15 mode, 1 write/read to access 2 arguments/results
540 self.peri.set_argument_count(Count::One);
541 self.peri.set_result_count(Count::One);
542
543 self.peri.set_data_width(Width::Bits16, Width::Bits16);
544
545 let mut output_count = 0;
546 let mut input_buf = [0u32; INPUT_BUF_MAX_LEN];
547 let mut input_buf_len = 0;
548
549 // In q1.15 mode, we always fill 1 pair of 16bit value into WDATA register.
550 // If arg2s is None or empty array, we assume arg2 value always 1.0 (as reset value for ARG2).
551 // If arg2s has some value, and but not as long as arg1s,
552 // we fill the reset of arg2 values with last value from arg2s (as q1.31 version does)
553
554 let arg2_default_value = match arg2s {
555 Some(arg2s) if !arg2s.is_empty() => arg2s[arg2s.len() - 1],
556 _ => 1.0,
557 };
558
559 let args = arg1s.iter().zip(
560 arg2s
561 .unwrap_or(&[])
562 .iter()
563 .chain(core::iter::repeat(&arg2_default_value)),
564 );
565
566 for (&arg1, &arg2) in args {
567 input_buf[input_buf_len] = utils::f32_args_to_u32(arg1, arg2);
568 input_buf_len += 1;
569
570 if input_buf_len == INPUT_BUF_MAX_LEN {
571 self.inner_dma_calc_16bit(&mut write_dma, &mut read_dma, &input_buf, output, &mut output_count)
572 .await;
573 }
574 }
575
576 if input_buf_len > 0 {
577 self.inner_dma_calc_16bit(
578 &mut write_dma,
579 &mut read_dma,
580 &input_buf[..input_buf_len],
581 output,
582 &mut output_count,
583 )
584 .await;
585 }
586
587 output_count
588 }
589
590 // this function is highly coupled with async_calc_16bit, and is not intended to use in other place
591 async fn inner_dma_calc_16bit(
592 &mut self,
593 write_dma: impl Peripheral<P = impl WriteDma<T>>,
594 read_dma: impl Peripheral<P = impl ReadDma<T>>,
595 input_buf: &[u32], // input_buf, its content should be extact values and length for calculation
596 output: &mut [f32], // caller uses should this as a final output array
597 output_start_index: &mut usize, // the index of start point of the output for this round of calculation
598 ) {
599 into_ref!(write_dma, read_dma);
600
601 let write_req = write_dma.request();
602 let read_req = read_dma.request();
603
604 // output_buf is the place to store raw value from CORDIC (via DMA).
605 let mut output_buf = [0u32; INPUT_BUF_MAX_LEN];
606
607 let active_output_buf = &mut output_buf[..input_buf.len()];
608
609 self.peri.enable_write_dma();
610 self.peri.enable_read_dma();
611
612 let on_drop = OnDrop::new(|| {
613 self.peri.disable_write_dma();
614 self.peri.disable_read_dma();
615 });
616
617 unsafe {
618 let write_transfer = dma::Transfer::new_write(
619 &mut write_dma,
620 write_req,
621 input_buf,
622 T::regs().wdata().as_ptr() as *mut _,
623 Default::default(),
624 );
625
626 let read_transfer = dma::Transfer::new_read(
627 &mut read_dma,
628 read_req,
629 T::regs().rdata().as_ptr() as *mut _,
630 active_output_buf,
631 Default::default(),
632 );
633
634 embassy_futures::join::join(write_transfer, read_transfer).await;
635 }
636
637 drop(on_drop);
638
639 for &mut output_u32 in active_output_buf {
640 let (res1, res2) = utils::u32_to_f32_res(output_u32);
641
642 output[*output_start_index] = res1;
643 *output_start_index += 1;
644
645 if !self.config.first_result {
646 output[*output_start_index] = res2;
647 *output_start_index += 1;
648 }
649 }
650 }
523} 651}
524 652
525// check input value ARG1, ARG2, SCALE and FUNCTION are compatible with each other 653// check input value ARG1, ARG2, SCALE and FUNCTION are compatible with each other
diff --git a/embassy-stm32/src/cordic/utils.rs b/embassy-stm32/src/cordic/utils.rs
index 2f4b5c5e8..79bef6b97 100644
--- a/embassy-stm32/src/cordic/utils.rs
+++ b/embassy-stm32/src/cordic/utils.rs
@@ -57,3 +57,16 @@ floating_fixed_convert!(
57 15, 57 15,
58 0x3800_0000u32 // binary form of 1f32^(-15) 58 0x3800_0000u32 // binary form of 1f32^(-15)
59); 59);
60
61#[inline(always)]
62pub(crate) fn f32_args_to_u32(arg1: f32, arg2: f32) -> u32 {
63 f32_to_q1_15(arg1) as u32 + ((f32_to_q1_15(arg2) as u32) << 16)
64}
65
66#[inline(always)]
67pub(crate) fn u32_to_f32_res(reg_value: u32) -> (f32, f32) {
68 let res1 = q1_15_to_f32((reg_value & ((1u32 << 16) - 1)) as u16);
69 let res2 = q1_15_to_f32((reg_value >> 16) as u16);
70
71 (res1, res2)
72}