diff --git a/memory.x b/memory.x index e6b80c473..372860d94 100644 --- a/memory.x +++ b/memory.x @@ -34,3 +34,37 @@ SECTIONS { KEEP(*(.boot2)); } > BOOT2 } INSERT BEFORE .text; + +/* Per-core (thread) data into flash */ +SECTIONS { + .tdata : ALIGN(4) + { + . = ALIGN(4); + PROVIDE(__tdata_start = .); + *(.tdata .tdata.*); + . = ALIGN(4); + PROVIDE(__tdata_end = .); + } > FLASH + PROVIDE(__tdata_len = __tdata_end - __tdata_start); +} INSERT AFTER .data; + +/* Size per-core state and allocate bss space for each core */ +SECTIONS { + .tbss (NOLOAD) : ALIGN(4) + { + . = ALIGN(4); + PROVIDE(__tbss_start = .); + *(.tbss .tbss.*); + *(.tcommon); + . = ALIGN(4); + PROVIDE(__tbss_end = .); + } > RAM + PROVIDE(__tbss_len = __tbss_end - __tbss_start); + + .tls_state (NOLOAD) : ALIGN(4) { + PROVIDE(TLS_CORE_0 = ALIGN(4)); + . += __tdata_len + __tbss_len; + PROVIDE(TLS_CORE_1 = ALIGN(4)); + . += __tdata_len + __tbss_len; + } > RAM +} INSERT AFTER .bss; diff --git a/rp2040-hal/Cargo.toml b/rp2040-hal/Cargo.toml index 0718629ff..ae428c8ea 100644 --- a/rp2040-hal/Cargo.toml +++ b/rp2040-hal/Cargo.toml @@ -52,6 +52,8 @@ bitfield = { version = "0.14.0" } i2c-write-iter = { version = "1.0.0", features = ["async"], optional = true } +cortex-m-rt = { version = "0.7", optional = true } + [dev-dependencies] cortex-m-rt = "0.7" cortex-m-rtic = "1.1.4" @@ -106,6 +108,9 @@ rtic-monotonic = ["dep:rtic-monotonic"] # Implement `i2c-write-iter` traits i2c-write-iter = ["dep:i2c-write-iter"] +# Enable use of thread-local variables for multicore state +thread_local = ["dep:cortex-m-rt"] + [[example]] # irq example uses cortex-m-rt::interrupt, need rt feature for that name = "gpio_irq_example" @@ -198,6 +203,10 @@ required-features = ["critical-section-impl"] name = "multicore_polyblink" required-features = ["critical-section-impl"] +[[example]] +name = "multicore_percore_data" +required-features = ["critical-section-impl", "thread_local"] + [[example]] name = "pio_blink" required-features = ["critical-section-impl"] diff --git a/rp2040-hal/examples/multicore_percore_data.rs b/rp2040-hal/examples/multicore_percore_data.rs new file mode 100644 index 000000000..c36968fc1 --- /dev/null +++ b/rp2040-hal/examples/multicore_percore_data.rs @@ -0,0 +1,166 @@ +//! # Multicore Blinking Example +//! +//! This application blinks two LEDs on GPIOs 2 and 3 at different rates (3Hz +//! and 4Hz respectively.) +//! +//! See the `Cargo.toml` file for Copyright and licence details. +#![no_std] +//#![cfg(feature = "thread_local")] +#![feature(thread_local)] +#![no_main] + +use core::cell::RefCell; + +use cortex_m::delay::Delay; + +use hal::clocks::Clock; +use hal::gpio::{DynPinId, FunctionSio, Pin, Pins, PullDown, SioOutput}; +use hal::multicore::{Multicore, Stack}; +use hal::sio::Sio; +// Ensure we halt the program on panic (if we don't mention this crate it won't +// be linked) +use panic_halt as _; + +// Alias for our HAL crate +use rp2040_hal as hal; + +// A shorter alias for the Peripheral Access Crate, which provides low-level +// register access +use hal::pac; + +// Some traits we need +use embedded_hal::digital::StatefulOutputPin; + +/// The linker will place this boot block at the start of our program image. We +/// need this to help the ROM bootloader get our code up and running. +/// Note: This boot block is not necessary when using a rp-hal based BSP +/// as the BSPs already perform this step. +#[link_section = ".boot2"] +#[used] +pub static BOOT2: [u8; 256] = rp2040_boot2::BOOT_LOADER_GENERIC_03H; + +/// External high-speed crystal on the Raspberry Pi Pico board is 12 MHz. Adjust +/// if your board has a different frequency +const XTAL_FREQ_HZ: u32 = 12_000_000u32; + +/// The frequency at which core 0 will blink its LED (Hz). +const CORE0_FREQ: u32 = 3; +/// The frequency at which core 1 will blink its LED (Hz). +const CORE1_FREQ: u32 = 4; +/// The delay between each toggle of core 0's LED (us). +const CORE0_DELAY: u32 = 1_000_000 / CORE0_FREQ; +/// The delay between each toggle of core 1's LED (us). +const CORE1_DELAY: u32 = 1_000_000 / CORE1_FREQ; + +/// Stack for core 1 +/// +/// Core 0 gets its stack via the normal route - any memory not used by static +/// values is reserved for stack and initialised by cortex-m-rt. +/// To get the same for Core 1, we would need to compile everything separately +/// and modify the linker file for both programs, and that's quite annoying. +/// So instead, core1.spawn takes a [usize] which gets used for the stack. +/// NOTE: We use the `Stack` struct here to ensure that it has 32-byte +/// alignment, which allows the stack guard to take up the least amount of +/// usable RAM. +static mut CORE1_STACK: Stack<4096> = Stack::new(); + +/// State for the blinker +struct BlinkState { + led: Pin, PullDown>, + delay: Delay, + delay_time: u32, +} + +/// Per core blinker state +#[thread_local] +static STATE: RefCell> = RefCell::new(None); + +/// Blink which ever LED with whatever delay, according to the per-core state. +fn blinker() -> ! { + let mut state = STATE.borrow_mut(); + let BlinkState { + led, + delay, + delay_time, + } = state.as_mut().unwrap(); + loop { + led.toggle().unwrap(); + delay.delay_us(*delay_time); + } +} + +/// Entry point to our bare-metal application. +/// +/// The `#[rp2040_hal::entry]` macro ensures the Cortex-M start-up code calls this function +/// as soon as all global variables and the spinlock are initialised. +#[rp2040_hal::entry] +fn main() -> ! { + // Grab our singleton objects + let mut pac = pac::Peripherals::take().unwrap(); + let core = pac::CorePeripherals::take().unwrap(); + + // Set up the watchdog driver - needed by the clock setup code + let mut watchdog = hal::watchdog::Watchdog::new(pac.WATCHDOG); + + // Configure the clocks + let clocks = hal::clocks::init_clocks_and_plls( + XTAL_FREQ_HZ, + pac.XOSC, + pac.CLOCKS, + pac.PLL_SYS, + pac.PLL_USB, + &mut pac.RESETS, + &mut watchdog, + ) + .unwrap(); + + let sys_freq = clocks.system_clock.freq().to_Hz(); + + // Set up the GPIO pins + let mut sio = Sio::new(pac.SIO); + let pins = Pins::new( + pac.IO_BANK0, + pac.PADS_BANK0, + sio.gpio_bank0, + &mut pac.RESETS, + ); + let led1 = pins.gpio2.into_push_pull_output(); + let led2 = pins.gpio3.into_push_pull_output(); + + // Start up the second core to blink the second LED + let mut mc = Multicore::new(&mut pac.PSM, &mut pac.PPB, &mut sio.fifo); + let cores = mc.cores(); + let core1 = &mut cores[1]; + core1 + .spawn(unsafe { &mut CORE1_STACK.mem }, move || { + // Get the second core's copy of the `CorePeripherals`, which are per-core. + // Unfortunately, `cortex-m` doesn't support this properly right now, + // so we have to use `steal`. + let core = unsafe { pac::CorePeripherals::steal() }; + // Set up the delay for the second core. + let delay = Delay::new(core.SYST, sys_freq); + + STATE.borrow_mut().replace(BlinkState { + led: led2.into_dyn_pin(), + delay, + delay_time: CORE1_DELAY, + }); + + // Blink the second LED. + blinker(); + }) + .unwrap(); + + // Set up the delay for the first core. + let delay = Delay::new(core.SYST, sys_freq); + + // Blink the first LED. + STATE.borrow_mut().replace(BlinkState { + led: led1.into_dyn_pin(), + delay, + delay_time: CORE0_DELAY, + }); + blinker(); +} + +// End of file diff --git a/rp2040-hal/src/multicore.rs b/rp2040-hal/src/multicore.rs index d1b018402..f543075a8 100644 --- a/rp2040-hal/src/multicore.rs +++ b/rp2040-hal/src/multicore.rs @@ -33,6 +33,47 @@ //! For inter-processor communications, see [`crate::sio::SioFifo`] and [`crate::sio::Spinlock0`] //! //! For a detailed example, see [examples/multicore_fifo_blink.rs](https://github.com/rp-rs/rp-hal/tree/main/rp2040-hal/examples/multicore_fifo_blink.rs) +//! +//! ## Per-core static data +//! +//! Both cores share the same memory, so a `static` variable will be accessible +//! and shared by both, requiring the same care as it would in a multi-threaded +//! program. +//! +//! With the `thread_local` feature enabled, this module supports the use of the +//! ([unstable](https://github.com/rust-lang/rust/issues/29594)) +//! `#[thread_local]` attribute to make these per-core variables. This allows +//! the same code to run on both cores but with its own core-specific static +//! state, such maintaining program state, or for things like DMA buffers. +//! +//! For example: +//! ```rust,ignore +//! #![feature(thread_local)] +//! # use core::cell::RefCell; +//! +//! #[thread_local] +//! static MY_COUNTER: RefCell = RefCell::new(0); +//! +//! fn next_id() -> usize { +//! MY_COUNTER.replace_with(|c| *c + 1) +//! } +//! ``` +//! +//! Each core will get its own instance of the `MY_COUNTER` variable. Since +//! these are not shared, they do not need atomic operations to update. +//! +//! These core-local variables are initialized on program startup and retain +//! their value from there on, even between invocations of [`Core::spawn`]. +//! +//! Note that this requires some setup in the linker script to allocate space +//! for the static data. See memory.x for details. +//! +//! If the variables are zero-initialized then they will be reserved space in +//! the `.tbss` section in the executable, and then space in `.bss` for each +//! core. Similarly, variables initialized with non-zero constants will be in +//! the executable's `.tdata` section, and have space reserved in `.bss`; the +//! initial values are copied at program startup. Note that this uses the +//! `__pre_init` hook to do this, so it won't be available for other uses. use core::mem::ManuallyDrop; use core::sync::atomic::compiler_fence; @@ -290,3 +331,51 @@ impl<'p> Core<'p> { } } } + +#[cfg(all(target_arch = "arm", feature = "thread_local"))] +mod thread_local { + use core::arch::global_asm; + use core::ptr::{addr_of, addr_of_mut}; + + extern "C" { + static mut TLS_CORE_0: u8; + static mut TLS_CORE_1: u8; + static __tdata_start: u8; + static __tdata_len: u8; + } + + // Define `__aeabi_read_tp` called by the compiler to get access to + // thread-local storage. + global_asm! { + ".pushsection .text.__aeabi_read_tp", + ".align 4", + ".p2align 4,,15", + ".global __aeabi_read_tp", + ".type __aeabi_read_tp,%function", + + "__aeabi_read_tp:", + " ldr r0, =0xd0000000", // Load SIO CPUID addr + " ldr r0, [r0]", // Load CPUID + " cmp r0, #0", // Check core 0 + " ldr r0, ={core_0}", // Set TLS_CORE_0 + " beq 1f", // skip if done + " ldr r0, ={core_1}", // Set TLS_CORE_1 + "1: bx lr", + + ".popsection", + core_0 = sym TLS_CORE_0, + core_1 = sym TLS_CORE_1, + } + + // Intercept __pre_init to hook into the startup code to copy the tdata into + // TLS_CORE_[01]. + // + // NB: Run as the very first thing, nothing has been initialized and memory + // could be in arbitrary state, so we only deal with things via raw pointers. + #[cortex_m_rt::pre_init] + unsafe fn tls_pre_init_hook() { + for dst in [addr_of_mut!(TLS_CORE_0), addr_of_mut!(TLS_CORE_1)] { + core::ptr::copy(addr_of!(__tdata_start), dst, addr_of!(__tdata_len) as usize); + } + } +}