aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoreZio Pan <[email protected]>2024-03-22 17:29:10 +0800
committereZio Pan <[email protected]>2024-03-23 09:15:25 +0800
commit0abcccee966af0b12e62fc7fae8499fa03194823 (patch)
treeb0cf28261ee18b1bd111de61d93ef5cbddc5f29e
parent83069e7b49bd181236e6a68005ad6119d39b39c3 (diff)
stm32 CORDIC: re-design API
-rw-r--r--embassy-stm32/src/cordic/errors.rs67
-rw-r--r--embassy-stm32/src/cordic/mod.rs741
-rw-r--r--embassy-stm32/src/cordic/utils.rs15
-rw-r--r--examples/stm32h5/src/bin/cordic.rs59
-rw-r--r--tests/stm32/src/bin/cordic.rs110
5 files changed, 414 insertions, 578 deletions
diff --git a/embassy-stm32/src/cordic/errors.rs b/embassy-stm32/src/cordic/errors.rs
index 653014290..3c70fc9e7 100644
--- a/embassy-stm32/src/cordic/errors.rs
+++ b/embassy-stm32/src/cordic/errors.rs
@@ -5,12 +5,14 @@ use super::{Function, Scale};
5pub enum CordicError { 5pub enum CordicError {
6 /// Config error 6 /// Config error
7 ConfigError(ConfigError), 7 ConfigError(ConfigError),
8 /// Argument error 8 /// Argument length is incorrect
9 ArgError(ArgError), 9 ArgumentLengthIncorrect,
10 /// Output buffer length error 10 /// Result buffer length error
11 OutputLengthNotEnough, 11 ResultLengthNotEnough,
12 /// Input value is out of range for Q1.x format 12 /// Input value is out of range for Q1.x format
13 NumberOutOfRange(NumberOutOfRange), 13 NumberOutOfRange(NumberOutOfRange),
14 /// Argument error
15 ArgError(ArgError),
14} 16}
15 17
16impl From<ConfigError> for CordicError { 18impl From<ConfigError> for CordicError {
@@ -19,18 +21,18 @@ impl From<ConfigError> for CordicError {
19 } 21 }
20} 22}
21 23
22impl From<ArgError> for CordicError {
23 fn from(value: ArgError) -> Self {
24 Self::ArgError(value)
25 }
26}
27
28impl From<NumberOutOfRange> for CordicError { 24impl From<NumberOutOfRange> for CordicError {
29 fn from(value: NumberOutOfRange) -> Self { 25 fn from(value: NumberOutOfRange) -> Self {
30 Self::NumberOutOfRange(value) 26 Self::NumberOutOfRange(value)
31 } 27 }
32} 28}
33 29
30impl From<ArgError> for CordicError {
31 fn from(value: ArgError) -> Self {
32 Self::ArgError(value)
33 }
34}
35
34#[cfg(feature = "defmt")] 36#[cfg(feature = "defmt")]
35impl defmt::Format for CordicError { 37impl defmt::Format for CordicError {
36 fn format(&self, fmt: defmt::Formatter) { 38 fn format(&self, fmt: defmt::Formatter) {
@@ -38,9 +40,10 @@ impl defmt::Format for CordicError {
38 40
39 match self { 41 match self {
40 ConfigError(e) => defmt::write!(fmt, "{}", e), 42 ConfigError(e) => defmt::write!(fmt, "{}", e),
41 ArgError(e) => defmt::write!(fmt, "{}", e), 43 ResultLengthNotEnough => defmt::write!(fmt, "Output buffer length is not long enough"),
44 ArgumentLengthIncorrect => defmt::write!(fmt, "Argument length incorrect"),
42 NumberOutOfRange(e) => defmt::write!(fmt, "{}", e), 45 NumberOutOfRange(e) => defmt::write!(fmt, "{}", e),
43 OutputLengthNotEnough => defmt::write!(fmt, "Output buffer length is not long enough"), 46 ArgError(e) => defmt::write!(fmt, "{}", e),
44 } 47 }
45 } 48 }
46} 49}
@@ -71,6 +74,26 @@ impl defmt::Format for ConfigError {
71 } 74 }
72} 75}
73 76
77/// Input value is out of range for Q1.x format
78#[allow(missing_docs)]
79#[derive(Debug)]
80pub enum NumberOutOfRange {
81 BelowLowerBound,
82 AboveUpperBound,
83}
84
85#[cfg(feature = "defmt")]
86impl defmt::Format for NumberOutOfRange {
87 fn format(&self, fmt: defmt::Formatter) {
88 use NumberOutOfRange::*;
89
90 match self {
91 BelowLowerBound => defmt::write!(fmt, "input value should be equal or greater than -1"),
92 AboveUpperBound => defmt::write!(fmt, "input value should be equal or less than 1"),
93 }
94 }
95}
96
74/// Error on checking input arguments 97/// Error on checking input arguments
75#[allow(dead_code)] 98#[allow(dead_code)]
76#[derive(Debug)] 99#[derive(Debug)]
@@ -119,23 +142,3 @@ pub(super) enum ArgType {
119 Arg1, 142 Arg1,
120 Arg2, 143 Arg2,
121} 144}
122
123/// Input value is out of range for Q1.x format
124#[allow(missing_docs)]
125#[derive(Debug)]
126pub enum NumberOutOfRange {
127 BelowLowerBound,
128 AboveUpperBound,
129}
130
131#[cfg(feature = "defmt")]
132impl defmt::Format for NumberOutOfRange {
133 fn format(&self, fmt: defmt::Formatter) {
134 use NumberOutOfRange::*;
135
136 match self {
137 BelowLowerBound => defmt::write!(fmt, "input value should be equal or greater than -1"),
138 AboveUpperBound => defmt::write!(fmt, "input value should be equal or less than 1"),
139 }
140 }
141}
diff --git a/embassy-stm32/src/cordic/mod.rs b/embassy-stm32/src/cordic/mod.rs
index f12efe2eb..2479e1b27 100644
--- a/embassy-stm32/src/cordic/mod.rs
+++ b/embassy-stm32/src/cordic/mod.rs
@@ -21,8 +21,6 @@ pub mod low_level {
21 pub use super::sealed::*; 21 pub use super::sealed::*;
22} 22}
23 23
24const INPUT_BUF_MAX_LEN: usize = 16;
25
26/// CORDIC driver 24/// CORDIC driver
27pub struct Cordic<'d, T: Instance> { 25pub struct Cordic<'d, T: Instance> {
28 peri: PeripheralRef<'d, T>, 26 peri: PeripheralRef<'d, T>,
@@ -38,17 +36,15 @@ pub struct Config {
38 function: Function, 36 function: Function,
39 precision: Precision, 37 precision: Precision,
40 scale: Scale, 38 scale: Scale,
41 res1_only: bool,
42} 39}
43 40
44impl Config { 41impl Config {
45 /// Create a config for Cordic driver 42 /// Create a config for Cordic driver
46 pub fn new(function: Function, precision: Precision, scale: Scale, res1_only: bool) -> Result<Self, CordicError> { 43 pub fn new(function: Function, precision: Precision, scale: Scale) -> Result<Self, CordicError> {
47 let config = Self { 44 let config = Self {
48 function, 45 function,
49 precision, 46 precision,
50 scale, 47 scale,
51 res1_only,
52 }; 48 };
53 49
54 config.check_scale()?; 50 config.check_scale()?;
@@ -117,54 +113,38 @@ impl<'d, T: Instance> Cordic<'d, T> {
117 self.peri.set_data_width(arg_width, res_width); 113 self.peri.set_data_width(arg_width, res_width);
118 } 114 }
119 115
120 fn reconfigure(&mut self) { 116 fn clean_rrdy_flag(&mut self) {
121 self.peri.set_func(self.config.function); 117 while self.peri.ready_to_read() {
122 self.peri.set_precision(self.config.precision); 118 self.peri.read_result();
123 self.peri.set_scale(self.config.scale); 119 }
124
125 // we don't set NRES in here, but to make sure NRES is set each time user call "calc"-ish functions,
126 // since each "calc"-ish functions can have different ARGSIZE and RESSIZE, thus NRES should be change accordingly.
127 } 120 }
128 121
129 async fn launch_a_dma_transfer( 122 /// Disable IRQ and DMA, clean RRDY, and set ARG2 to +1 (0x7FFFFFFF)
130 &mut self, 123 pub fn reconfigure(&mut self) {
131 write_dma: impl Peripheral<P = impl WriteDma<T>>, 124 // reset ARG2 to +1
132 read_dma: impl Peripheral<P = impl ReadDma<T>>, 125 {
133 input: &[u32], 126 self.peri.disable_irq();
134 output: &mut [u32],
135 ) {
136 into_ref!(write_dma, read_dma);
137
138 let write_req = write_dma.request();
139 let read_req = read_dma.request();
140
141 self.peri.enable_write_dma();
142 self.peri.enable_read_dma();
143
144 let _on_drop = OnDrop::new(|| {
145 self.peri.disable_write_dma();
146 self.peri.disable_read_dma(); 127 self.peri.disable_read_dma();
147 }); 128 self.peri.disable_write_dma();
148 129 self.clean_rrdy_flag();
149 unsafe {
150 let write_transfer = dma::Transfer::new_write(
151 &mut write_dma,
152 write_req,
153 input,
154 T::regs().wdata().as_ptr() as *mut _,
155 Default::default(),
156 );
157 130
158 let read_transfer = dma::Transfer::new_read( 131 self.peri.set_func(Function::Cos);
159 &mut read_dma, 132 self.peri.set_precision(Precision::Iters4);
160 read_req, 133 self.peri.set_scale(Scale::Arg1Res1);
161 T::regs().rdata().as_ptr() as *mut _, 134 self.peri.set_argument_count(AccessCount::Two);
162 output, 135 self.peri.set_data_width(Width::Bits32, Width::Bits32);
163 Default::default(), 136 self.peri.write_argument(0x0u32);
164 ); 137 self.peri.write_argument(0x7FFFFFFFu32);
165 138
166 embassy_futures::join::join(write_transfer, read_transfer).await; 139 self.clean_rrdy_flag();
167 } 140 }
141
142 self.peri.set_func(self.config.function);
143 self.peri.set_precision(self.config.precision);
144 self.peri.set_scale(self.config.scale);
145
146 // we don't set NRES in here, but to make sure NRES is set each time user call "calc"-ish functions,
147 // since each "calc"-ish functions can have different ARGSIZE and RESSIZE, thus NRES should be change accordingly.
168 } 148 }
169} 149}
170 150
@@ -176,311 +156,222 @@ impl<'d, T: Instance> Drop for Cordic<'d, T> {
176 156
177// q1.31 related 157// q1.31 related
178impl<'d, T: Instance> Cordic<'d, T> { 158impl<'d, T: Instance> Cordic<'d, T> {
179 /// Run a blocking CORDIC calculation in q1.31 format 159 /// Run a blocking CORDIC calculation in q1.31 format
160 ///
161 /// Notice:
162 /// If you set `arg1_only` to `true`, please be sure ARG2 value has been set to desired value before.
163 /// This function won't set ARG2 to +1 before or after each round of calculation.
164 /// If you want to make sure ARG2 is set to +1, consider run [.reconfigure()](Self::reconfigure).
180 pub fn blocking_calc_32bit( 165 pub fn blocking_calc_32bit(
181 &mut self, 166 &mut self,
182 arg1s: &[f64], 167 arg: &[u32],
183 arg2s: Option<&[f64]>, 168 res: &mut [u32],
184 output: &mut [f64], 169 arg1_only: bool,
170 res1_only: bool,
185 ) -> Result<usize, CordicError> { 171 ) -> Result<usize, CordicError> {
186 if arg1s.is_empty() { 172 if arg.is_empty() {
187 return Ok(0); 173 return Ok(0);
188 } 174 }
189 175
190 let output_length_enough = match self.config.res1_only { 176 let res_cnt = Self::check_arg_res_length_32bit(arg.len(), res.len(), arg1_only, res1_only)?;
191 true => output.len() >= arg1s.len(),
192 false => output.len() >= 2 * arg1s.len(),
193 };
194 177
195 if !output_length_enough { 178 self.peri
196 return Err(CordicError::OutputLengthNotEnough); 179 .set_argument_count(if arg1_only { AccessCount::One } else { AccessCount::Two });
197 }
198
199 self.check_input_f64(arg1s, arg2s)?;
200 180
201 self.peri.set_result_count(if self.config.res1_only { 181 self.peri
202 AccessCount::One 182 .set_result_count(if res1_only { AccessCount::One } else { AccessCount::Two });
203 } else {
204 AccessCount::Two
205 });
206 183
207 self.peri.set_data_width(Width::Bits32, Width::Bits32); 184 self.peri.set_data_width(Width::Bits32, Width::Bits32);
208 185
209 let mut output_count = 0; 186 let mut cnt = 0;
210
211 let mut consumed_input_len = 0;
212 187
213 // 188 match arg1_only {
214 // handle 2 input args calculation 189 true => {
215 // 190 // To use cordic preload function, the first value is special.
191 // It is loaded to CORDIC WDATA register out side of loop
192 let first_value = arg[0];
216 193
217 if arg2s.is_some() && !arg2s.unwrap().is_empty() { 194 // preload 1st value to CORDIC, to start the CORDIC calc
218 let arg2s = arg2s.unwrap(); 195 self.peri.write_argument(first_value);
219 196
220 self.peri.set_argument_count(AccessCount::Two); 197 for &arg1 in &arg[1..] {
198 // preload arg1 (for next calc)
199 self.peri.write_argument(arg1);
221 200
222 // Skip 1st value from arg1s, this value will be manually "preload" to cordic, to make use of cordic preload function. 201 // then read current result out
223 // And we preserve last value from arg2s, since it need to manually write to cordic, and read the result out. 202 res[cnt] = self.peri.read_result();
224 let double_input = arg1s.iter().skip(1).zip(&arg2s[..arg2s.len() - 1]); 203 cnt += 1;
225 // Since we preload 1st value from arg1s, the consumed input length is double_input length + 1. 204 if !res1_only {
226 consumed_input_len = double_input.len() + 1; 205 res[cnt] = self.peri.read_result();
227 206 cnt += 1;
228 // preload first value from arg1 to cordic 207 }
229 self.blocking_write_f64(arg1s[0])?; 208 }
230
231 for (&arg1, &arg2) in double_input {
232 // Since we manually preload a value before,
233 // we will write arg2 (from the actual last pair) first, (at this moment, cordic start to calculating,)
234 // and write arg1 (from the actual next pair), then read the result, to "keep preloading"
235 209
236 self.blocking_write_f64(arg2)?; 210 // read the last result
237 self.blocking_write_f64(arg1)?; 211 res[cnt] = self.peri.read_result();
238 self.blocking_read_f64_to_buf(output, &mut output_count); 212 cnt += 1;
213 if !res1_only {
214 res[cnt] = self.peri.read_result();
215 // cnt += 1;
216 }
239 } 217 }
218 false => {
219 // To use cordic preload function, the first and last value is special.
220 // They are load to CORDIC WDATA register out side of loop
221 let first_value = arg[0];
222 let last_value = arg[arg.len() - 1];
240 223
241 // write last input value from arg2s, then read out the result 224 let paired_args = &arg[1..arg.len() - 1];
242 self.blocking_write_f64(arg2s[arg2s.len() - 1])?;
243 self.blocking_read_f64_to_buf(output, &mut output_count);
244 }
245 225
246 // 226 // preload 1st value to CORDIC
247 // handle 1 input arg calculation 227 self.peri.write_argument(first_value);
248 //
249 228
250 let input_left = &arg1s[consumed_input_len..]; 229 for args in paired_args.chunks(2) {
230 let arg2 = args[0];
231 let arg1 = args[1];
251 232
252 if !input_left.is_empty() { 233 // load arg2 (for current calc) first, to start the CORDIC calc
253 self.peri.set_argument_count(AccessCount::One); 234 self.peri.write_argument(arg2);
254 235
255 // "preload" value to cordic (at this moment, cordic start to calculating) 236 // preload arg1 (for next calc)
256 self.blocking_write_f64(input_left[0])?; 237 self.peri.write_argument(arg1);
257 238
258 for &arg in input_left.iter().skip(1) { 239 // then read current result out
259 // this line write arg for next round calculation to cordic, 240 res[cnt] = self.peri.read_result();
260 // and read result from last round 241 cnt += 1;
261 self.blocking_write_f64(arg)?; 242 if !res1_only {
262 self.blocking_read_f64_to_buf(output, &mut output_count); 243 res[cnt] = self.peri.read_result();
263 } 244 cnt += 1;
245 }
246 }
264 247
265 // read the last output 248 // load last value to CORDIC, and finish the calculation
266 self.blocking_read_f64_to_buf(output, &mut output_count); 249 self.peri.write_argument(last_value);
250 res[cnt] = self.peri.read_result();
251 cnt += 1;
252 if !res1_only {
253 res[cnt] = self.peri.read_result();
254 // cnt += 1;
255 }
256 }
267 } 257 }
268 258
269 Ok(output_count) 259 // at this point cnt should be equal to res_cnt
270 }
271
272 fn blocking_read_f64_to_buf(&mut self, result_buf: &mut [f64], result_index: &mut usize) {
273 result_buf[*result_index] = utils::q1_31_to_f64(self.peri.read_result());
274 *result_index += 1;
275 260
276 // We don't care about whether the function return 1 or 2 results, 261 Ok(res_cnt)
277 // the only thing matter is whether user want 1 or 2 results.
278 if !self.config.res1_only {
279 result_buf[*result_index] = utils::q1_31_to_f64(self.peri.read_result());
280 *result_index += 1;
281 }
282 }
283
284 fn blocking_write_f64(&mut self, arg: f64) -> Result<(), NumberOutOfRange> {
285 self.peri.write_argument(utils::f64_to_q1_31(arg)?);
286 Ok(())
287 } 262 }
288 263
289 /// Run a async CORDIC calculation in q.1.31 format 264 /// Run a async CORDIC calculation in q.1.31 format
265 ///
266 /// Notice:
267 /// If you set `arg1_only` to `true`, please be sure ARG2 value has been set to desired value before.
268 /// This function won't set ARG2 to +1 before or after each round of calculation.
269 /// If you want to make sure ARG2 is set to +1, consider run [.reconfigure()](Self::reconfigure).
290 pub async fn async_calc_32bit( 270 pub async fn async_calc_32bit(
291 &mut self, 271 &mut self,
292 write_dma: impl Peripheral<P = impl WriteDma<T>>, 272 write_dma: impl Peripheral<P = impl WriteDma<T>>,
293 read_dma: impl Peripheral<P = impl ReadDma<T>>, 273 read_dma: impl Peripheral<P = impl ReadDma<T>>,
294 arg1s: &[f64], 274 arg: &[u32],
295 arg2s: Option<&[f64]>, 275 res: &mut [u32],
296 output: &mut [f64], 276 arg1_only: bool,
277 res1_only: bool,
297 ) -> Result<usize, CordicError> { 278 ) -> Result<usize, CordicError> {
298 if arg1s.is_empty() { 279 if arg.is_empty() {
299 return Ok(0); 280 return Ok(0);
300 } 281 }
301 282
302 let output_length_enough = match self.config.res1_only { 283 let res_cnt = Self::check_arg_res_length_32bit(arg.len(), res.len(), arg1_only, res1_only)?;
303 true => output.len() >= arg1s.len(),
304 false => output.len() >= 2 * arg1s.len(),
305 };
306 284
307 if !output_length_enough { 285 let active_res_buf = &mut res[..res_cnt];
308 return Err(CordicError::OutputLengthNotEnough);
309 }
310
311 self.check_input_f64(arg1s, arg2s)?;
312 286
313 into_ref!(write_dma, read_dma); 287 into_ref!(write_dma, read_dma);
314 288
315 self.peri.set_result_count(if self.config.res1_only { 289 self.peri
316 AccessCount::One 290 .set_argument_count(if arg1_only { AccessCount::One } else { AccessCount::Two });
317 } else {
318 AccessCount::Two
319 });
320
321 self.peri.set_data_width(Width::Bits32, Width::Bits32);
322 291
323 let mut output_count = 0; 292 self.peri
324 let mut consumed_input_len = 0; 293 .set_result_count(if res1_only { AccessCount::One } else { AccessCount::Two });
325 let mut input_buf = [0u32; INPUT_BUF_MAX_LEN];
326 let mut input_buf_len = 0;
327 294
328 // 295 self.peri.set_data_width(Width::Bits32, Width::Bits32);
329 // handle 2 input args calculation
330 //
331
332 if !arg2s.unwrap_or_default().is_empty() {
333 let arg2s = arg2s.unwrap();
334 296
335 self.peri.set_argument_count(AccessCount::Two); 297 let write_req = write_dma.request();
298 let read_req = read_dma.request();
336 299
337 let double_input = arg1s.iter().zip(arg2s); 300 self.peri.enable_write_dma();
301 self.peri.enable_read_dma();
338 302
339 consumed_input_len = double_input.len(); 303 let _on_drop = OnDrop::new(|| {
304 self.peri.disable_write_dma();
305 self.peri.disable_read_dma();
306 });
340 307
341 for (&arg1, &arg2) in double_input { 308 unsafe {
342 for &arg in [arg1, arg2].iter() { 309 let write_transfer = dma::Transfer::new_write(
343 input_buf[input_buf_len] = utils::f64_to_q1_31(arg)?; 310 &mut write_dma,
344 input_buf_len += 1; 311 write_req,
345 } 312 arg,
313 T::regs().wdata().as_ptr() as *mut _,
314 Default::default(),
315 );
346 316
347 if input_buf_len == INPUT_BUF_MAX_LEN { 317 let read_transfer = dma::Transfer::new_read(
348 self.inner_dma_calc_32bit( 318 &mut read_dma,
349 &mut write_dma, 319 read_req,
350 &mut read_dma, 320 T::regs().rdata().as_ptr() as *mut _,
351 true, 321 active_res_buf,
352 &input_buf[..input_buf_len], 322 Default::default(),
353 output, 323 );
354 &mut output_count,
355 )
356 .await;
357
358 input_buf_len = 0;
359 }
360 }
361 324
362 if input_buf_len > 0 { 325 embassy_futures::join::join(write_transfer, read_transfer).await;
363 self.inner_dma_calc_32bit(
364 &mut write_dma,
365 &mut read_dma,
366 true,
367 &input_buf[..input_buf_len],
368 output,
369 &mut output_count,
370 )
371 .await;
372
373 input_buf_len = 0;
374 }
375 } 326 }
376 327
377 // 328 Ok(res_cnt)
378 // handle 1 input arg calculation 329 }
379 //
380
381 if arg1s.len() > consumed_input_len {
382 let input_remain = &arg1s[consumed_input_len..];
383
384 self.peri.set_argument_count(AccessCount::One);
385
386 for &arg in input_remain {
387 input_buf[input_buf_len] = utils::f64_to_q1_31(arg)?;
388 input_buf_len += 1;
389
390 if input_buf_len == INPUT_BUF_MAX_LEN {
391 self.inner_dma_calc_32bit(
392 &mut write_dma,
393 &mut read_dma,
394 false,
395 &input_buf[..input_buf_len],
396 output,
397 &mut output_count,
398 )
399 .await;
400
401 input_buf_len = 0;
402 }
403 }
404 330
405 if input_buf_len > 0 { 331 fn check_arg_res_length_32bit(
406 self.inner_dma_calc_32bit( 332 arg_len: usize,
407 &mut write_dma, 333 res_len: usize,
408 &mut read_dma, 334 arg1_only: bool,
409 false, 335 res1_only: bool,
410 &input_buf[..input_buf_len], 336 ) -> Result<usize, CordicError> {
411 output, 337 if !arg1_only && arg_len % 2 != 0 {
412 &mut output_count, 338 return Err(CordicError::ArgumentLengthIncorrect);
413 )
414 .await;
415
416 // input_buf_len = 0;
417 }
418 } 339 }
419 340
420 Ok(output_count) 341 let mut minimal_res_length = arg_len;
421 }
422 342
423 // this function is highly coupled with async_calc_32bit, and is not intended to use in other place 343 if !res1_only {
424 async fn inner_dma_calc_32bit( 344 minimal_res_length *= 2;
425 &mut self,
426 write_dma: impl Peripheral<P = impl WriteDma<T>>,
427 read_dma: impl Peripheral<P = impl ReadDma<T>>,
428 double_input: bool, // gether extra info to calc output_buf size
429 input_buf: &[u32], // input_buf, its content should be exact length for calculation
430 output: &mut [f64], // caller should uses this buf as a final output array
431 output_start_index: &mut usize, // the index of start point of the output for this round of calculation
432 ) {
433 // output_buf is the place to store raw value from CORDIC (via DMA).
434 // For buf size, we assume in this round of calculation:
435 // all input is 1 arg, and all calculation need 2 output,
436 // thus output_buf will always be long enough.
437 let mut output_buf = [0u32; INPUT_BUF_MAX_LEN * 2];
438
439 let mut output_buf_size = input_buf.len();
440 if !self.config.res1_only {
441 // if we need 2 result for 1 input, then output_buf length should be 2x long.
442 output_buf_size *= 2;
443 };
444 if double_input {
445 // if input itself is 2 args for 1 calculation, then output_buf length should be /2.
446 output_buf_size /= 2;
447 } 345 }
448 346
449 let active_output_buf = &mut output_buf[..output_buf_size]; 347 if !arg1_only {
450 348 minimal_res_length /= 2
451 self.launch_a_dma_transfer(write_dma, read_dma, input_buf, active_output_buf) 349 }
452 .await;
453 350
454 for &mut output_u32 in active_output_buf { 351 if minimal_res_length > res_len {
455 output[*output_start_index] = utils::q1_31_to_f64(output_u32); 352 return Err(CordicError::ResultLengthNotEnough);
456 *output_start_index += 1;
457 } 353 }
354
355 Ok(minimal_res_length)
458 } 356 }
459} 357}
460 358
461// q1.15 related 359// q1.15 related
462impl<'d, T: Instance> Cordic<'d, T> { 360impl<'d, T: Instance> Cordic<'d, T> {
463 /// Run a blocking CORDIC calculation in q1.15 format 361 /// Run a blocking CORDIC calculation in q1.15 format
464 pub fn blocking_calc_16bit( 362 ///
465 &mut self, 363 /// Notice::
466 arg1s: &[f32], 364 /// User will take respond to merge two u16 arguments into one u32 data, and/or split one u32 data into two u16 results.
467 arg2s: Option<&[f32]>, 365 pub fn blocking_calc_16bit(&mut self, arg: &[u32], res: &mut [u32]) -> Result<usize, CordicError> {
468 output: &mut [f32], 366 if arg.is_empty() {
469 ) -> Result<usize, CordicError> {
470 if arg1s.is_empty() {
471 return Ok(0); 367 return Ok(0);
472 } 368 }
473 369
474 let output_length_enough = match self.config.res1_only { 370 if arg.len() > res.len() {
475 true => output.len() >= arg1s.len(), 371 return Err(CordicError::ResultLengthNotEnough);
476 false => output.len() >= 2 * arg1s.len(),
477 };
478
479 if !output_length_enough {
480 return Err(CordicError::OutputLengthNotEnough);
481 } 372 }
482 373
483 self.check_input_f32(arg1s, arg2s)?; 374 let res_cnt = arg.len();
484 375
485 // In q1.15 mode, 1 write/read to access 2 arguments/results 376 // In q1.15 mode, 1 write/read to access 2 arguments/results
486 self.peri.set_argument_count(AccessCount::One); 377 self.peri.set_argument_count(AccessCount::One);
@@ -488,83 +379,53 @@ impl<'d, T: Instance> Cordic<'d, T> {
488 379
489 self.peri.set_data_width(Width::Bits16, Width::Bits16); 380 self.peri.set_data_width(Width::Bits16, Width::Bits16);
490 381
491 let mut output_count = 0; 382 // To use cordic preload function, the first value is special.
492 383 // It is loaded to CORDIC WDATA register out side of loop
493 // In q1.15 mode, we always fill 1 pair of 16bit value into WDATA register. 384 let first_value = arg[0];
494 // If arg2s is None or empty array, we assume arg2 value always 1.0 (as reset value for ARG2).
495 // If arg2s has some value, and but not as long as arg1s,
496 // we fill the reset of arg2 values with last value from arg2s (as q1.31 version does)
497 385
498 let arg2_default_value = match arg2s { 386 // preload 1st value to CORDIC, to start the CORDIC calc
499 Some(arg2s) if !arg2s.is_empty() => arg2s[arg2s.len() - 1], 387 self.peri.write_argument(first_value);
500 _ => 1.0,
501 };
502
503 let mut args = arg1s.iter().zip(
504 arg2s
505 .unwrap_or(&[])
506 .iter()
507 .chain(core::iter::repeat(&arg2_default_value)),
508 );
509 388
510 let (&arg1, &arg2) = args.next().unwrap(); 389 let mut cnt = 0;
511 390
512 // preloading 1 pair of arguments 391 for &arg_val in &arg[1..] {
513 self.blocking_write_f32(arg1, arg2)?; 392 // preload arg_val (for next calc)
393 self.peri.write_argument(arg_val);
514 394
515 for (&arg1, &arg2) in args { 395 // then read current result out
516 self.blocking_write_f32(arg1, arg2)?; 396 res[cnt] = self.peri.read_result();
517 self.blocking_read_f32_to_buf(output, &mut output_count); 397 cnt += 1;
518 } 398 }
519 399
520 // read last pair of value from cordic 400 // read last result out
521 self.blocking_read_f32_to_buf(output, &mut output_count); 401 res[cnt] = self.peri.read_result();
402 // cnt += 1;
522 403
523 Ok(output_count) 404 Ok(res_cnt)
524 } 405 }
525 406
526 fn blocking_write_f32(&mut self, arg1: f32, arg2: f32) -> Result<(), NumberOutOfRange> { 407 /// Run a async CORDIC calculation in q1.15 format
527 self.peri.write_argument(utils::f32_args_to_u32(arg1, arg2)?); 408 ///
528 Ok(()) 409 /// Notice::
529 } 410 /// User will take respond to merge two u16 arguments into one u32 data, and/or split one u32 data into two u16 results.
530
531 fn blocking_read_f32_to_buf(&mut self, result_buf: &mut [f32], result_index: &mut usize) {
532 let (res1, res2) = utils::u32_to_f32_res(self.peri.read_result());
533
534 result_buf[*result_index] = res1;
535 *result_index += 1;
536
537 // We don't care about whether the function return 1 or 2 results,
538 // the only thing matter is whether user want 1 or 2 results.
539 if !self.config.res1_only {
540 result_buf[*result_index] = res2;
541 *result_index += 1;
542 }
543 }
544
545 /// Run a async CORDIC calculation in q1.15 format
546 pub async fn async_calc_16bit( 411 pub async fn async_calc_16bit(
547 &mut self, 412 &mut self,
548 write_dma: impl Peripheral<P = impl WriteDma<T>>, 413 write_dma: impl Peripheral<P = impl WriteDma<T>>,
549 read_dma: impl Peripheral<P = impl ReadDma<T>>, 414 read_dma: impl Peripheral<P = impl ReadDma<T>>,
550 arg1s: &[f32], 415 arg: &[u32],
551 arg2s: Option<&[f32]>, 416 res: &mut [u32],
552 output: &mut [f32],
553 ) -> Result<usize, CordicError> { 417 ) -> Result<usize, CordicError> {
554 if arg1s.is_empty() { 418 if arg.is_empty() {
555 return Ok(0); 419 return Ok(0);
556 } 420 }
557 421
558 let output_length_enough = match self.config.res1_only { 422 if arg.len() > res.len() {
559 true => output.len() >= arg1s.len(), 423 return Err(CordicError::ResultLengthNotEnough);
560 false => output.len() >= 2 * arg1s.len(),
561 };
562
563 if !output_length_enough {
564 return Err(CordicError::OutputLengthNotEnough);
565 } 424 }
566 425
567 self.check_input_f32(arg1s, arg2s)?; 426 let res_cnt = arg.len();
427
428 let active_res_buf = &mut res[..res_cnt];
568 429
569 into_ref!(write_dma, read_dma); 430 into_ref!(write_dma, read_dma);
570 431
@@ -574,142 +435,96 @@ impl<'d, T: Instance> Cordic<'d, T> {
574 435
575 self.peri.set_data_width(Width::Bits16, Width::Bits16); 436 self.peri.set_data_width(Width::Bits16, Width::Bits16);
576 437
577 let mut output_count = 0; 438 let write_req = write_dma.request();
578 let mut input_buf = [0u32; INPUT_BUF_MAX_LEN]; 439 let read_req = read_dma.request();
579 let mut input_buf_len = 0;
580
581 // In q1.15 mode, we always fill 1 pair of 16bit value into WDATA register.
582 // If arg2s is None or empty array, we assume arg2 value always 1.0 (as reset value for ARG2).
583 // If arg2s has some value, and but not as long as arg1s,
584 // we fill the reset of arg2 values with last value from arg2s (as CORDIC behavior on q1.31 format)
585
586 let arg2_default_value = match arg2s {
587 Some(arg2s) if !arg2s.is_empty() => arg2s[arg2s.len() - 1],
588 _ => 1.0,
589 };
590
591 let args = arg1s.iter().zip(
592 arg2s
593 .unwrap_or(&[])
594 .iter()
595 .chain(core::iter::repeat(&arg2_default_value)),
596 );
597 440
598 for (&arg1, &arg2) in args { 441 self.peri.enable_write_dma();
599 input_buf[input_buf_len] = utils::f32_args_to_u32(arg1, arg2)?; 442 self.peri.enable_read_dma();
600 input_buf_len += 1;
601 443
602 if input_buf_len == INPUT_BUF_MAX_LEN { 444 let _on_drop = OnDrop::new(|| {
603 self.inner_dma_calc_16bit(&mut write_dma, &mut read_dma, &input_buf, output, &mut output_count) 445 self.peri.disable_write_dma();
604 .await; 446 self.peri.disable_read_dma();
605 } 447 });
606 }
607 448
608 if input_buf_len > 0 { 449 unsafe {
609 self.inner_dma_calc_16bit( 450 let write_transfer = dma::Transfer::new_write(
610 &mut write_dma, 451 &mut write_dma,
611 &mut read_dma, 452 write_req,
612 &input_buf[..input_buf_len], 453 arg,
613 output, 454 T::regs().wdata().as_ptr() as *mut _,
614 &mut output_count, 455 Default::default(),
615 ) 456 );
616 .await;
617 }
618
619 Ok(output_count)
620 }
621
622 // this function is highly coupled with async_calc_16bit, and is not intended to use in other place
623 async fn inner_dma_calc_16bit(
624 &mut self,
625 write_dma: impl Peripheral<P = impl WriteDma<T>>,
626 read_dma: impl Peripheral<P = impl ReadDma<T>>,
627 input_buf: &[u32], // input_buf, its content should be exact length for calculation
628 output: &mut [f32], // caller should uses this buf as a final output array
629 output_start_index: &mut usize, // the index of start point of the output for this round of calculation
630 ) {
631 // output_buf is the place to store raw value from CORDIC (via DMA).
632 let mut output_buf = [0u32; INPUT_BUF_MAX_LEN];
633
634 let active_output_buf = &mut output_buf[..input_buf.len()];
635
636 self.launch_a_dma_transfer(write_dma, read_dma, input_buf, active_output_buf)
637 .await;
638
639 for &mut output_u32 in active_output_buf {
640 let (res1, res2) = utils::u32_to_f32_res(output_u32);
641 457
642 output[*output_start_index] = res1; 458 let read_transfer = dma::Transfer::new_read(
643 *output_start_index += 1; 459 &mut read_dma,
460 read_req,
461 T::regs().rdata().as_ptr() as *mut _,
462 active_res_buf,
463 Default::default(),
464 );
644 465
645 if !self.config.res1_only { 466 embassy_futures::join::join(write_transfer, read_transfer).await;
646 output[*output_start_index] = res2;
647 *output_start_index += 1;
648 }
649 } 467 }
468
469 Ok(res_cnt)
650 } 470 }
651} 471}
652 472
653// check input value ARG1, ARG2, SCALE and FUNCTION are compatible with each other 473macro_rules! check_arg_value {
654macro_rules! check_input_value { 474 ($func_arg1_name:ident, $func_arg2_name:ident, $float_type:ty) => {
655 ($func_name:ident, $float_type:ty) => {
656 impl<'d, T: Instance> Cordic<'d, T> { 475 impl<'d, T: Instance> Cordic<'d, T> {
657 fn $func_name(&self, arg1s: &[$float_type], arg2s: Option<&[$float_type]>) -> Result<(), ArgError> { 476 /// check input value ARG1, SCALE and FUNCTION are compatible with each other
477 pub fn $func_arg1_name(&self, arg: $float_type) -> Result<(), ArgError> {
658 let config = &self.config; 478 let config = &self.config;
659 479
660 use Function::*; 480 use Function::*;
661 481
662 struct Arg1ErrInfo { 482 struct Arg1ErrInfo {
663 scale: Option<Scale>, 483 scale: Option<Scale>,
664 range: [f32; 2], 484 range: [f32; 2], // f32 is ok, it only used in error display
665 inclusive_upper_bound: bool, 485 inclusive_upper_bound: bool,
666 } 486 }
667 487
668 // check ARG1 value
669 let err_info = match config.function { 488 let err_info = match config.function {
670 Cos | Sin | Phase | Modulus | Arctan if arg1s.iter().any(|v| !(-1.0..=1.0).contains(v)) => { 489 Cos | Sin | Phase | Modulus | Arctan if !(-1.0..=1.0).contains(arg) => Some(Arg1ErrInfo {
671 Some(Arg1ErrInfo { 490 scale: None,
672 scale: None, 491 range: [-1.0, 1.0],
673 range: [-1.0, 1.0], 492 inclusive_upper_bound: true,
674 inclusive_upper_bound: true, 493 }),
675 })
676 }
677 494
678 Cosh | Sinh if arg1s.iter().any(|v| !(-0.559..=0.559).contains(v)) => Some(Arg1ErrInfo { 495 Cosh | Sinh if !(-0.559..=0.559).contains(arg) => Some(Arg1ErrInfo {
679 scale: None, 496 scale: None,
680 range: [-0.559, 0.559], 497 range: [-0.559, 0.559],
681 inclusive_upper_bound: true, 498 inclusive_upper_bound: true,
682 }), 499 }),
683 500
684 Arctanh if arg1s.iter().any(|v| !(-0.403..=0.403).contains(v)) => Some(Arg1ErrInfo { 501 Arctanh if !(-0.403..=0.403).contains(arg) => Some(Arg1ErrInfo {
685 scale: None, 502 scale: None,
686 range: [-0.403, 0.403], 503 range: [-0.403, 0.403],
687 inclusive_upper_bound: true, 504 inclusive_upper_bound: true,
688 }), 505 }),
689 506
690 Ln => match config.scale { 507 Ln => match config.scale {
691 Scale::Arg1o2Res2 if arg1s.iter().any(|v| !(0.0535..0.5).contains(v)) => Some(Arg1ErrInfo { 508 Scale::Arg1o2Res2 if !(0.0535..0.5).contains(arg) => Some(Arg1ErrInfo {
692 scale: Some(Scale::Arg1o2Res2), 509 scale: Some(Scale::Arg1o2Res2),
693 range: [0.0535, 0.5], 510 range: [0.0535, 0.5],
694 inclusive_upper_bound: false, 511 inclusive_upper_bound: false,
695 }), 512 }),
696 Scale::Arg1o4Res4 if arg1s.iter().any(|v| !(0.25..0.75).contains(v)) => Some(Arg1ErrInfo { 513 Scale::Arg1o4Res4 if !(0.25..0.75).contains(arg) => Some(Arg1ErrInfo {
697 scale: Some(Scale::Arg1o4Res4), 514 scale: Some(Scale::Arg1o4Res4),
698 range: [0.25, 0.75], 515 range: [0.25, 0.75],
699 inclusive_upper_bound: false, 516 inclusive_upper_bound: false,
700 }), 517 }),
701 Scale::Arg1o8Res8 if arg1s.iter().any(|v| !(0.375..0.875).contains(v)) => Some(Arg1ErrInfo { 518 Scale::Arg1o8Res8 if !(0.375..0.875).contains(arg) => Some(Arg1ErrInfo {
702 scale: Some(Scale::Arg1o8Res8), 519 scale: Some(Scale::Arg1o8Res8),
703 range: [0.375, 0.875], 520 range: [0.375, 0.875],
704 inclusive_upper_bound: false, 521 inclusive_upper_bound: false,
705 }), 522 }),
706 Scale::Arg1o16Res16 if arg1s.iter().any(|v| !(0.4375..0.584).contains(v)) => { 523 Scale::Arg1o16Res16 if !(0.4375..0.584).contains(arg) => Some(Arg1ErrInfo {
707 Some(Arg1ErrInfo { 524 scale: Some(Scale::Arg1o16Res16),
708 scale: Some(Scale::Arg1o16Res16), 525 range: [0.4375, 0.584],
709 range: [0.4375, 0.584], 526 inclusive_upper_bound: false,
710 inclusive_upper_bound: false, 527 }),
711 })
712 }
713 528
714 Scale::Arg1o2Res2 | Scale::Arg1o4Res4 | Scale::Arg1o8Res8 | Scale::Arg1o16Res16 => None, 529 Scale::Arg1o2Res2 | Scale::Arg1o4Res4 | Scale::Arg1o8Res8 | Scale::Arg1o16Res16 => None,
715 530
@@ -717,17 +532,17 @@ macro_rules! check_input_value {
717 }, 532 },
718 533
719 Sqrt => match config.scale { 534 Sqrt => match config.scale {
720 Scale::Arg1Res1 if arg1s.iter().any(|v| !(0.027..0.75).contains(v)) => Some(Arg1ErrInfo { 535 Scale::Arg1Res1 if !(0.027..0.75).contains(arg) => Some(Arg1ErrInfo {
721 scale: Some(Scale::Arg1Res1), 536 scale: Some(Scale::Arg1Res1),
722 range: [0.027, 0.75], 537 range: [0.027, 0.75],
723 inclusive_upper_bound: false, 538 inclusive_upper_bound: false,
724 }), 539 }),
725 Scale::Arg1o2Res2 if arg1s.iter().any(|v| !(0.375..0.875).contains(v)) => Some(Arg1ErrInfo { 540 Scale::Arg1o2Res2 if !(0.375..0.875).contains(arg) => Some(Arg1ErrInfo {
726 scale: Some(Scale::Arg1o2Res2), 541 scale: Some(Scale::Arg1o2Res2),
727 range: [0.375, 0.875], 542 range: [0.375, 0.875],
728 inclusive_upper_bound: false, 543 inclusive_upper_bound: false,
729 }), 544 }),
730 Scale::Arg1o4Res4 if arg1s.iter().any(|v| !(0.4375..0.584).contains(v)) => Some(Arg1ErrInfo { 545 Scale::Arg1o4Res4 if !(0.4375..0.584).contains(arg) => Some(Arg1ErrInfo {
731 scale: Some(Scale::Arg1o4Res4), 546 scale: Some(Scale::Arg1o4Res4),
732 range: [0.4375, 0.584], 547 range: [0.4375, 0.584],
733 inclusive_upper_bound: false, 548 inclusive_upper_bound: false,
@@ -749,33 +564,35 @@ macro_rules! check_input_value {
749 }); 564 });
750 } 565 }
751 566
752 // check ARG2 value 567 Ok(())
753 if let Some(arg2s) = arg2s { 568 }
754 struct Arg2ErrInfo {
755 range: [f32; 2],
756 }
757 569
758 let err_info = match config.function { 570 /// check input value ARG2 and FUNCTION are compatible with each other
759 Cos | Sin if arg2s.iter().any(|v| !(0.0..=1.0).contains(v)) => { 571 pub fn $func_arg2_name(&self, arg: $float_type) -> Result<(), ArgError> {
760 Some(Arg2ErrInfo { range: [0.0, 1.0] }) 572 let config = &self.config;
761 } 573
762 574 use Function::*;
763 Phase | Modulus if arg2s.iter().any(|v| !(-1.0..=1.0).contains(v)) => { 575
764 Some(Arg2ErrInfo { range: [-1.0, 1.0] }) 576 struct Arg2ErrInfo {
765 } 577 range: [f32; 2], // f32 is ok, it only used in error display
766 578 }
767 Cos | Sin | Phase | Modulus | Arctan | Cosh | Sinh | Arctanh | Ln | Sqrt => None, 579
768 }; 580 let err_info = match config.function {
769 581 Cos | Sin if !(0.0..=1.0).contains(arg) => Some(Arg2ErrInfo { range: [0.0, 1.0] }),
770 if let Some(err) = err_info { 582
771 return Err(ArgError { 583 Phase | Modulus if !(-1.0..=1.0).contains(arg) => Some(Arg2ErrInfo { range: [-1.0, 1.0] }),
772 func: config.function, 584
773 scale: None, 585 Cos | Sin | Phase | Modulus | Arctan | Cosh | Sinh | Arctanh | Ln | Sqrt => None,
774 arg_range: err.range, 586 };
775 inclusive_upper_bound: true, 587
776 arg_type: ArgType::Arg2, 588 if let Some(err) = err_info {
777 }); 589 return Err(ArgError {
778 } 590 func: config.function,
591 scale: None,
592 arg_range: err.range,
593 inclusive_upper_bound: true,
594 arg_type: ArgType::Arg2,
595 });
779 } 596 }
780 597
781 Ok(()) 598 Ok(())
@@ -784,8 +601,8 @@ macro_rules! check_input_value {
784 }; 601 };
785} 602}
786 603
787check_input_value!(check_input_f64, f64); 604check_arg_value!(check_f64_arg1, check_f64_arg2, &f64);
788check_input_value!(check_input_f32, f32); 605check_arg_value!(check_f32_arg1, check_f32_arg2, &f32);
789 606
790foreach_interrupt!( 607foreach_interrupt!(
791 ($inst:ident, cordic, $block:ident, GLOBAL, $irq:ident) => { 608 ($inst:ident, cordic, $block:ident, GLOBAL, $irq:ident) => {
diff --git a/embassy-stm32/src/cordic/utils.rs b/embassy-stm32/src/cordic/utils.rs
index 41821d6e2..008f50270 100644
--- a/embassy-stm32/src/cordic/utils.rs
+++ b/embassy-stm32/src/cordic/utils.rs
@@ -1,4 +1,4 @@
1//! Common match utils 1//! Common math utils
2use super::errors::NumberOutOfRange; 2use super::errors::NumberOutOfRange;
3 3
4macro_rules! floating_fixed_convert { 4macro_rules! floating_fixed_convert {
@@ -60,16 +60,3 @@ floating_fixed_convert!(
60 15, 60 15,
61 0x3800_0000u32 // binary form of 1f32^(-15) 61 0x3800_0000u32 // binary form of 1f32^(-15)
62); 62);
63
64#[inline(always)]
65pub(crate) fn f32_args_to_u32(arg1: f32, arg2: f32) -> Result<u32, NumberOutOfRange> {
66 Ok(f32_to_q1_15(arg1)? as u32 + ((f32_to_q1_15(arg2)? as u32) << 16))
67}
68
69#[inline(always)]
70pub(crate) fn u32_to_f32_res(reg_value: u32) -> (f32, f32) {
71 let res1 = q1_15_to_f32((reg_value & ((1u32 << 16) - 1)) as u16);
72 let res2 = q1_15_to_f32((reg_value >> 16) as u16);
73
74 (res1, res2)
75}
diff --git a/examples/stm32h5/src/bin/cordic.rs b/examples/stm32h5/src/bin/cordic.rs
index d49f75b8f..73e873574 100644
--- a/examples/stm32h5/src/bin/cordic.rs
+++ b/examples/stm32h5/src/bin/cordic.rs
@@ -3,7 +3,7 @@
3 3
4use defmt::*; 4use defmt::*;
5use embassy_executor::Spawner; 5use embassy_executor::Spawner;
6use embassy_stm32::cordic; 6use embassy_stm32::cordic::{self, utils};
7use {defmt_rtt as _, panic_probe as _}; 7use {defmt_rtt as _, panic_probe as _};
8 8
9#[embassy_executor::main] 9#[embassy_executor::main]
@@ -16,20 +16,63 @@ async fn main(_spawner: Spawner) {
16 cordic::Function::Sin, 16 cordic::Function::Sin,
17 Default::default(), 17 Default::default(),
18 Default::default(), 18 Default::default(),
19 false,
20 )), 19 )),
21 ); 20 );
22 21
23 let mut output = [0f64; 16]; 22 // for output buf, the length is not that strict, larger than minimal required is ok.
23 let mut output_f64 = [0f64; 19];
24 let mut output_u32 = [0u32; 21];
24 25
25 let arg1 = [1.0, 0.0, -1.0]; // for trigonometric function, the ARG1 value [-pi, pi] should be map to [-1, 1] 26 // tips:
26 let arg2 = [0.5, 1.0]; 27 // CORDIC peripheral has some strict on input value, you can also use ".check_argX_fXX()" methods
28 // to make sure your input values are compatible with current CORDIC setup.
29 let arg1 = [-1.0, -0.5, 0.0, 0.5, 1.0]; // for trigonometric function, the ARG1 value [-pi, pi] should be map to [-1, 1]
30 let arg2 = [0.5]; // and for Sin function, ARG2 should be in [0, 1]
27 31
28 let cnt = unwrap!( 32 let mut input_buf = [0u32; 9];
33
34 // convert input from floating point to fixed point
35 input_buf[0] = unwrap!(utils::f64_to_q1_31(arg1[0]));
36 input_buf[1] = unwrap!(utils::f64_to_q1_31(arg2[0]));
37
38 // If input length is small, blocking mode can be used to minimize overhead.
39 let cnt0 = unwrap!(cordic.blocking_calc_32bit(
40 &input_buf[..2], // input length is strict, since driver use its length to detect calculation count
41 &mut output_u32,
42 false,
43 false
44 ));
45
46 // convert result from fixed point into floating point
47 for (&u32_val, f64_val) in output_u32[..cnt0].iter().zip(output_f64.iter_mut()) {
48 *f64_val = utils::q1_31_to_f64(u32_val);
49 }
50
51 // convert input from floating point to fixed point
52 //
53 // first value from arg1 is used, so truncate to arg1[1..]
54 for (&f64_val, u32_val) in arg1[1..].iter().zip(input_buf.iter_mut()) {
55 *u32_val = unwrap!(utils::f64_to_q1_31(f64_val));
56 }
57
58 // If calculation is a little longer, async mode can make use of DMA, and let core do some other stuff.
59 let cnt1 = unwrap!(
29 cordic 60 cordic
30 .async_calc_32bit(&mut dp.GPDMA1_CH0, &mut dp.GPDMA1_CH1, &arg1, Some(&arg2), &mut output,) 61 .async_calc_32bit(
62 &mut dp.GPDMA1_CH0,
63 &mut dp.GPDMA1_CH1,
64 &input_buf[..arg1.len() - 1], // limit input buf to its actual length
65 &mut output_u32,
66 true,
67 false
68 )
31 .await 69 .await
32 ); 70 );
33 71
34 println!("async calc 32bit: {}", output[..cnt]); 72 // convert result from fixed point into floating point
73 for (&u32_val, f64_val) in output_u32[..cnt1].iter().zip(output_f64[cnt0..cnt0 + cnt1].iter_mut()) {
74 *f64_val = utils::q1_31_to_f64(u32_val);
75 }
76
77 println!("result: {}", output_f64[..cnt0 + cnt1]);
35} 78}
diff --git a/tests/stm32/src/bin/cordic.rs b/tests/stm32/src/bin/cordic.rs
index cd2e9d6f7..669fd96ab 100644
--- a/tests/stm32/src/bin/cordic.rs
+++ b/tests/stm32/src/bin/cordic.rs
@@ -14,6 +14,7 @@
14mod common; 14mod common;
15use common::*; 15use common::*;
16use embassy_executor::Spawner; 16use embassy_executor::Spawner;
17use embassy_stm32::cordic::utils;
17use embassy_stm32::{bind_interrupts, cordic, peripherals, rng}; 18use embassy_stm32::{bind_interrupts, cordic, peripherals, rng};
18use num_traits::Float; 19use num_traits::Float;
19use {defmt_rtt as _, panic_probe as _}; 20use {defmt_rtt as _, panic_probe as _};
@@ -24,11 +25,12 @@ bind_interrupts!(struct Irqs {
24 25
25/* input value control, can be changed */ 26/* input value control, can be changed */
26 27
27const ARG1_LENGTH: usize = 9; 28const INPUT_U32_COUNT: usize = 9;
28const ARG2_LENGTH: usize = 4; // this might not be the exact length of ARG2, since ARG2 need to be inside [0, 1] 29const INPUT_U8_COUNT: usize = 4 * INPUT_U32_COUNT;
29 30
30const INPUT_Q1_31_LENGTH: usize = ARG1_LENGTH + ARG2_LENGTH; 31// Assume first calculation needs 2 arguments, the reset needs 1 argument.
31const INPUT_U8_LENGTH: usize = 4 * INPUT_Q1_31_LENGTH; 32// And all calculation generate 2 results.
33const OUTPUT_LENGTH: usize = (INPUT_U32_COUNT - 1) * 2;
32 34
33#[embassy_executor::main] 35#[embassy_executor::main]
34async fn main(_spawner: Spawner) { 36async fn main(_spawner: Spawner) {
@@ -42,43 +44,28 @@ async fn main(_spawner: Spawner) {
42 44
43 let mut rng = rng::Rng::new(dp.RNG, Irqs); 45 let mut rng = rng::Rng::new(dp.RNG, Irqs);
44 46
45 let mut input_buf_u8 = [0u8; INPUT_U8_LENGTH]; 47 let mut input_buf_u8 = [0u8; INPUT_U8_COUNT];
46 defmt::unwrap!(rng.async_fill_bytes(&mut input_buf_u8).await); 48 defmt::unwrap!(rng.async_fill_bytes(&mut input_buf_u8).await);
47 49
48 // convert every [u8; 4] to a u32, for a Q1.31 value 50 // convert every [u8; 4] to a u32, for a Q1.31 value
49 let input_q1_31 = unsafe { core::mem::transmute::<[u8; INPUT_U8_LENGTH], [u32; INPUT_Q1_31_LENGTH]>(input_buf_u8) }; 51 let mut input_q1_31 = unsafe { core::mem::transmute::<[u8; INPUT_U8_COUNT], [u32; INPUT_U32_COUNT]>(input_buf_u8) };
50 52
51 let mut input_f64_buf = [0f64; INPUT_Q1_31_LENGTH]; 53 // ARG2 for Sin function should be inside [0, 1], set MSB to 0 of a Q1.31 value, will make sure it's no less than 0.
54 input_q1_31[1] &= !(1u32 << 31);
52 55
53 let mut cordic_output_f64_buf = [0f64; ARG1_LENGTH * 2]; 56 //
54 57 // CORDIC calculation
55 // convert Q1.31 value back to f64, for software calculation verify 58 //
56 for (val_u32, val_f64) in input_q1_31.iter().zip(input_f64_buf.iter_mut()) {
57 *val_f64 = cordic::utils::q1_31_to_f64(*val_u32);
58 }
59
60 let mut arg2_f64_buf = [0f64; ARG2_LENGTH];
61 let mut arg2_f64_len = 0;
62
63 // check if ARG2 is in range [0, 1] (limited by CORDIC peripheral with Sin mode)
64 for &arg2 in &input_f64_buf[ARG1_LENGTH..] {
65 if arg2 >= 0.0 {
66 arg2_f64_buf[arg2_f64_len] = arg2;
67 arg2_f64_len += 1;
68 }
69 }
70 59
71 // the actual value feed to CORDIC 60 let mut output_q1_31 = [0u32; OUTPUT_LENGTH];
72 let arg1_f64_ls = &input_f64_buf[..ARG1_LENGTH];
73 let arg2_f64_ls = &arg2_f64_buf[..arg2_f64_len];
74 61
62 // setup Cordic driver
75 let mut cordic = cordic::Cordic::new( 63 let mut cordic = cordic::Cordic::new(
76 dp.CORDIC, 64 dp.CORDIC,
77 defmt::unwrap!(cordic::Config::new( 65 defmt::unwrap!(cordic::Config::new(
78 cordic::Function::Sin, 66 cordic::Function::Sin,
79 Default::default(), 67 Default::default(),
80 Default::default(), 68 Default::default(),
81 false,
82 )), 69 )),
83 ); 70 );
84 71
@@ -88,67 +75,66 @@ async fn main(_spawner: Spawner) {
88 #[cfg(any(feature = "stm32h563zi", feature = "stm32u585ai", feature = "stm32u5a5zj"))] 75 #[cfg(any(feature = "stm32h563zi", feature = "stm32u585ai", feature = "stm32u5a5zj"))]
89 let (mut write_dma, mut read_dma) = (dp.GPDMA1_CH4, dp.GPDMA1_CH5); 76 let (mut write_dma, mut read_dma) = (dp.GPDMA1_CH4, dp.GPDMA1_CH5);
90 77
91 let cordic_start_point = embassy_time::Instant::now(); 78 // calculate first result using blocking mode
79 let cnt0 = defmt::unwrap!(cordic.blocking_calc_32bit(&input_q1_31[..2], &mut output_q1_31, false, false));
92 80
93 let cnt = unwrap!( 81 // calculate rest results using async mode
82 let cnt1 = defmt::unwrap!(
94 cordic 83 cordic
95 .async_calc_32bit( 84 .async_calc_32bit(
96 &mut write_dma, 85 &mut write_dma,
97 &mut read_dma, 86 &mut read_dma,
98 arg1_f64_ls, 87 &input_q1_31[2..],
99 Some(arg2_f64_ls), 88 &mut output_q1_31[cnt0..],
100 &mut cordic_output_f64_buf, 89 true,
90 false,
101 ) 91 )
102 .await 92 .await
103 ); 93 );
104 94
105 let cordic_end_point = embassy_time::Instant::now(); 95 // all output value length should be the same as our output buffer size
96 defmt::assert_eq!(cnt0 + cnt1, output_q1_31.len());
97
98 let mut cordic_result_f64 = [0.0f64; OUTPUT_LENGTH];
99
100 for (f64_val, u32_val) in cordic_result_f64.iter_mut().zip(output_q1_31) {
101 *f64_val = utils::q1_31_to_f64(u32_val);
102 }
106 103
107 // since we get 2 output for 1 calculation, the output length should be ARG1_LENGTH * 2 104 //
108 defmt::assert!(cnt == ARG1_LENGTH * 2); 105 // software calculation
106 //
109 107
110 let mut software_output_f64_buf = [0f64; ARG1_LENGTH * 2]; 108 let mut software_result_f64 = [0.0f64; OUTPUT_LENGTH];
111 109
112 // for software calc, if there is no ARG2 value, insert a 1.0 as value (the reset value for ARG2 in CORDIC) 110 let arg2 = utils::q1_31_to_f64(input_q1_31[1]);
113 let arg2_f64_ls = if arg2_f64_len == 0 { &[1.0] } else { arg2_f64_ls };
114 111
115 let software_inputs = arg1_f64_ls 112 for (&arg1, res) in input_q1_31
116 .iter() 113 .iter()
117 .zip( 114 .enumerate()
118 arg2_f64_ls 115 .filter_map(|(idx, val)| if idx != 1 { Some(val) } else { None })
119 .iter() 116 .zip(software_result_f64.chunks_mut(2))
120 .chain(core::iter::repeat(&arg2_f64_ls[arg2_f64_ls.len() - 1])), 117 {
121 ) 118 let arg1 = utils::q1_31_to_f64(arg1);
122 .zip(software_output_f64_buf.chunks_mut(2));
123
124 let software_start_point = embassy_time::Instant::now();
125 119
126 for ((arg1, arg2), res) in software_inputs {
127 let (raw_res1, raw_res2) = (arg1 * core::f64::consts::PI).sin_cos(); 120 let (raw_res1, raw_res2) = (arg1 * core::f64::consts::PI).sin_cos();
128
129 (res[0], res[1]) = (raw_res1 * arg2, raw_res2 * arg2); 121 (res[0], res[1]) = (raw_res1 * arg2, raw_res2 * arg2);
130 } 122 }
131 123
132 let software_end_point = embassy_time::Instant::now(); 124 //
125 // check result are the same
126 //
133 127
134 for (cordic_res, software_res) in cordic_output_f64_buf[..cnt] 128 for (cordic_res, software_res) in cordic_result_f64[..cnt0 + cnt1]
135 .chunks(2) 129 .chunks(2)
136 .zip(software_output_f64_buf.chunks(2)) 130 .zip(software_result_f64.chunks(2))
137 { 131 {
138 for (cord_res, soft_res) in cordic_res.iter().zip(software_res.iter()) { 132 for (cord_res, soft_res) in cordic_res.iter().zip(software_res.iter()) {
133 // 2.0.powi(-19) is the max residual error for Sin function, in q1.31 format, with 24 iterations (aka PRECISION = 6)
139 defmt::assert!((cord_res - soft_res).abs() <= 2.0.powi(-19)); 134 defmt::assert!((cord_res - soft_res).abs() <= 2.0.powi(-19));
140 } 135 }
141 } 136 }
142 137
143 // This comparison is just for fun. Since it not a equal compare:
144 // software use 64-bit floating point, but CORDIC use 32-bit fixed point.
145 defmt::trace!(
146 "calculate count: {}, Cordic time: {} us, software time: {} us",
147 ARG1_LENGTH,
148 (cordic_end_point - cordic_start_point).as_micros(),
149 (software_end_point - software_start_point).as_micros()
150 );
151
152 info!("Test OK"); 138 info!("Test OK");
153 cortex_m::asm::bkpt(); 139 cortex_m::asm::bkpt();
154} 140}