diff options
| author | Matt Johnston <[email protected]> | 2025-07-11 17:38:42 +0800 |
|---|---|---|
| committer | Matt Johnston <[email protected]> | 2025-07-15 16:40:37 +0800 |
| commit | e2ceb2b1f7cd0fd7778b53aaf8ba1caa71b2f7f5 (patch) | |
| tree | 53adf1ef9d2086d7500154421ad2691100cf7038 /embassy-usb-synopsys-otg/src | |
| parent | f53b6649dd42918f7d5228d06821ac5b95e33d38 (diff) | |
otg: Improve IN write performance
chunks_exact() can be handled by the compiler more efficiently.
Previous code was making a memcpy call for each 4 byte chunk slice.
Hoisting the fifo out of the loop avoids recalculating the pointer each
time.
In my benchmark I see a jump from ~13 megabyte/sec to ~25MB/sec after
this change (opt-level=3). opt-level = "z" goes 9MB/s to 18MB/s.
The benchmark was on a stm32h7s3l8, 600mhz clock, 512 byte bulk writes,
data in DTCM. The benchmark isn't just USB writes, also has some
unrelated memcpys for packet construction.
Diffstat (limited to 'embassy-usb-synopsys-otg/src')
| -rw-r--r-- | embassy-usb-synopsys-otg/src/lib.rs | 19 |
1 files changed, 16 insertions, 3 deletions
diff --git a/embassy-usb-synopsys-otg/src/lib.rs b/embassy-usb-synopsys-otg/src/lib.rs index fc4428b54..3f6531813 100644 --- a/embassy-usb-synopsys-otg/src/lib.rs +++ b/embassy-usb-synopsys-otg/src/lib.rs | |||
| @@ -1210,10 +1210,23 @@ impl<'d> embassy_usb_driver::EndpointIn for Endpoint<'d, In> { | |||
| 1210 | }); | 1210 | }); |
| 1211 | 1211 | ||
| 1212 | // Write data to FIFO | 1212 | // Write data to FIFO |
| 1213 | for chunk in buf.chunks(4) { | 1213 | let chunks = buf.chunks_exact(4); |
| 1214 | // Stash the last partial chunk | ||
| 1215 | let rem = chunks.remainder(); | ||
| 1216 | let last_chunk = (!rem.is_empty()).then(|| { | ||
| 1214 | let mut tmp = [0u8; 4]; | 1217 | let mut tmp = [0u8; 4]; |
| 1215 | tmp[0..chunk.len()].copy_from_slice(chunk); | 1218 | tmp[0..rem.len()].copy_from_slice(rem); |
| 1216 | self.regs.fifo(index).write_value(regs::Fifo(u32::from_ne_bytes(tmp))); | 1219 | u32::from_ne_bytes(tmp) |
| 1220 | }); | ||
| 1221 | |||
| 1222 | let fifo = self.regs.fifo(index); | ||
| 1223 | for chunk in chunks { | ||
| 1224 | let val = u32::from_ne_bytes(chunk.try_into().unwrap()); | ||
| 1225 | fifo.write_value(regs::Fifo(val)); | ||
| 1226 | } | ||
| 1227 | // Write any last chunk | ||
| 1228 | if let Some(val) = last_chunk { | ||
| 1229 | fifo.write_value(regs::Fifo(val)); | ||
| 1217 | } | 1230 | } |
| 1218 | }); | 1231 | }); |
| 1219 | 1232 | ||
