aboutsummaryrefslogtreecommitdiff
path: root/embassy-usb-synopsys-otg/src
diff options
context:
space:
mode:
authorMatt Johnston <[email protected]>2025-07-11 17:38:42 +0800
committerMatt Johnston <[email protected]>2025-07-15 16:40:37 +0800
commite2ceb2b1f7cd0fd7778b53aaf8ba1caa71b2f7f5 (patch)
tree53adf1ef9d2086d7500154421ad2691100cf7038 /embassy-usb-synopsys-otg/src
parentf53b6649dd42918f7d5228d06821ac5b95e33d38 (diff)
otg: Improve IN write performance
chunks_exact() can be handled by the compiler more efficiently. Previous code was making a memcpy call for each 4 byte chunk slice. Hoisting the fifo out of the loop avoids recalculating the pointer each time. In my benchmark I see a jump from ~13 megabyte/sec to ~25MB/sec after this change (opt-level=3). opt-level = "z" goes 9MB/s to 18MB/s. The benchmark was on a stm32h7s3l8, 600mhz clock, 512 byte bulk writes, data in DTCM. The benchmark isn't just USB writes, also has some unrelated memcpys for packet construction.
Diffstat (limited to 'embassy-usb-synopsys-otg/src')
-rw-r--r--embassy-usb-synopsys-otg/src/lib.rs19
1 files changed, 16 insertions, 3 deletions
diff --git a/embassy-usb-synopsys-otg/src/lib.rs b/embassy-usb-synopsys-otg/src/lib.rs
index fc4428b54..3f6531813 100644
--- a/embassy-usb-synopsys-otg/src/lib.rs
+++ b/embassy-usb-synopsys-otg/src/lib.rs
@@ -1210,10 +1210,23 @@ impl<'d> embassy_usb_driver::EndpointIn for Endpoint<'d, In> {
1210 }); 1210 });
1211 1211
1212 // Write data to FIFO 1212 // Write data to FIFO
1213 for chunk in buf.chunks(4) { 1213 let chunks = buf.chunks_exact(4);
1214 // Stash the last partial chunk
1215 let rem = chunks.remainder();
1216 let last_chunk = (!rem.is_empty()).then(|| {
1214 let mut tmp = [0u8; 4]; 1217 let mut tmp = [0u8; 4];
1215 tmp[0..chunk.len()].copy_from_slice(chunk); 1218 tmp[0..rem.len()].copy_from_slice(rem);
1216 self.regs.fifo(index).write_value(regs::Fifo(u32::from_ne_bytes(tmp))); 1219 u32::from_ne_bytes(tmp)
1220 });
1221
1222 let fifo = self.regs.fifo(index);
1223 for chunk in chunks {
1224 let val = u32::from_ne_bytes(chunk.try_into().unwrap());
1225 fifo.write_value(regs::Fifo(val));
1226 }
1227 // Write any last chunk
1228 if let Some(val) = last_chunk {
1229 fifo.write_value(regs::Fifo(val));
1217 } 1230 }
1218 }); 1231 });
1219 1232