1use aarch64_cpu::registers::{Readable, Writeable};
2use armv9a::regs::{CPTR_EL2, SMCR_EL2, SVCR, ZCR_EL1, ZCR_EL2};
3use armv9a::InMemoryRegister;
4use core::arch::asm;
5use core::array::from_fn;
6use lazy_static::lazy_static;
7use spin::mutex::Mutex;
8
9use super::Rec;
10use crate::config::NUM_OF_CPU;
11use crate::cpu::get_cpu_id;
12use crate::granule::GranuleState;
13use crate::realm::rd::Rd;
14use crate::rec::RecAuxIndex;
15use crate::rmi::error::Error;
16use crate::simd::{sme_en, SimdConfig, MAX_VQ};
17use crate::{get_granule, get_granule_if};
18
19#[derive(Default, Debug)]
21pub struct SimdContext {
22 pub is_used: bool,
23 pub is_saved: bool,
24
25 pub cfg: SimdConfig,
26}
27
28#[derive(Default, Debug)]
29pub struct SimdRegister {
30 pub cptr_el2: u64,
32 pub svcr: u64,
34 pub fpu: FpuRegs,
36 pub sve: SveRegs,
37}
38
39impl SimdRegister {
40 pub fn new() -> Self {
41 Default::default()
42 }
43}
44
45impl vmsa::guard::Content for SimdRegister {}
46impl safe_abstraction::raw_ptr::RawPtr for SimdRegister {}
47impl safe_abstraction::raw_ptr::SafetyChecked for SimdRegister {}
48impl safe_abstraction::raw_ptr::SafetyAssured for SimdRegister {
49 fn is_initialized(&self) -> bool {
50 true
53 }
54
55 fn verify_ownership(&self) -> bool {
56 true
57 }
58}
59
60const NUM_FPU_REGS: usize = 32;
62#[derive(Default, Debug)]
63pub struct FpuRegs {
64 pub q: [u128; NUM_FPU_REGS],
65 pub fpsr: u64,
67 pub fpcr: u64,
68}
69
70lazy_static! {
71 static ref NS_SIMD: [Mutex<SimdRegister>; NUM_OF_CPU] =
72 from_fn(|_| Mutex::new(SimdRegister::default()));
73}
74
75const NUM_VECTOR_REGS: usize = 32;
77const NUM_PREDICATE_REGS: usize = 16;
78
79#[derive(Default, Debug)]
80pub struct SveRegs {
81 pub z: [[u128; NUM_VECTOR_REGS]; MAX_VQ as usize],
84 pub p: [[u16; NUM_PREDICATE_REGS]; MAX_VQ as usize],
86 pub ffr: [u16; MAX_VQ as usize],
87 pub zcr_el2: u64,
88 pub zcr_el12: u64,
89}
90
91pub fn init_simd(rec: &mut Rec<'_>) -> Result<(), Error> {
95 let raw_ptr: *const Rd = rec.owner()? as *const Rd;
96 let rd: &Rd = unsafe { raw_ptr.as_ref().expect("REASON") }; let simd_cfg = rd.simd_config();
98
99 let mut zcr_el2: u64 = 0;
100 let mut svcr: u64 = 0;
101
102 rec.context.simd.is_used = false;
103 rec.context.simd.is_saved = false;
104 rec.context.simd.cfg.sve_en = simd_cfg.sve_en;
105 rec.context.simd.cfg.sve_vq = simd_cfg.sve_vq;
106 rec.context.simd.cfg.sme_en = simd_cfg.sme_en;
107
108 if simd_cfg.sve_en {
110 zcr_el2 = ZCR_EL2::LEN.val(simd_cfg.sve_vq).value;
111 }
112 if simd_cfg.sme_en {
113 svcr = 0;
114 }
115
116 let simd_aux = rec.aux(RecAuxIndex::SIMD as usize) as usize;
117 let mut simd_granule = get_granule_if!(simd_aux, GranuleState::RecAux)?;
118 debug!("RecAux granule for simd at 0x{:x}", simd_aux);
119 let mut simd_regs = simd_granule.new_uninit_with::<SimdRegister>(SimdRegister::new())?;
120 simd_regs.cptr_el2 =
124 (CPTR_EL2::TAM::SET + CPTR_EL2::TSM::SET + CPTR_EL2::TFP::SET + CPTR_EL2::TZ::SET).value;
125 simd_regs.sve.zcr_el2 = zcr_el2;
126 simd_regs.svcr = svcr;
127 Ok(())
128}
129
130#[allow(aarch64_softfloat_neon)]
140#[target_feature(enable = "neon")]
141unsafe fn save_fpu(fpu: &mut FpuRegs) {
142 let addr_q: u64 = fpu.q.as_ptr() as u64;
143 unsafe {
144 asm!(
145 "stp q0, q1, [{addr_q}]",
146 "stp q2, q3, [{addr_q}, #32]",
147 "stp q4, q5, [{addr_q}, #64]",
148 "stp q6, q7, [{addr_q}, #96]",
149 "stp q8, q9, [{addr_q}, #128]",
150 "stp q10, q11, [{addr_q}, #160]",
151 "stp q12, q13, [{addr_q}, #192]",
152 "stp q14, q15, [{addr_q}, #224]",
153 "stp q16, q17, [{addr_q}, #256]",
154 "stp q18, q19, [{addr_q}, #288]",
155 "stp q20, q21, [{addr_q}, #320]",
156 "stp q22, q23, [{addr_q}, #352]",
157 "stp q24, q25, [{addr_q}, #384]",
158 "stp q26, q27, [{addr_q}, #416]",
159 "stp q28, q29, [{addr_q}, #448]",
160 "stp q30, q31, [{addr_q}, #480]",
161 addr_q = in(reg) addr_q,
162 );
163 save_fpu_crsr(fpu);
164 }
165}
166
167#[allow(aarch64_softfloat_neon)]
171#[target_feature(enable = "neon")]
172unsafe fn save_fpu_crsr(fpu: &mut FpuRegs) {
173 let fpsr: u64;
174 let fpcr: u64;
175 unsafe {
176 asm!(
177 "mrs {fpsr}, fpsr",
178 "mrs {fpcr}, fpcr",
179 fpsr = out(reg) fpsr,
180 fpcr = out(reg) fpcr,
181 );
182 }
183 fpu.fpsr = fpsr;
184 fpu.fpcr = fpcr;
185}
186
187#[allow(aarch64_softfloat_neon)]
191#[target_feature(enable = "neon")]
192pub unsafe fn restore_fpu(fpu: &FpuRegs) {
193 let addr_q: u64 = fpu.q.as_ptr() as u64;
194 unsafe {
195 asm!(
196 "ldp q0, q1, [{addr_q}]",
197 "ldp q2, q3, [{addr_q}, #32]",
198 "ldp q4, q5, [{addr_q}, #64]",
199 "ldp q6, q7, [{addr_q}, #96]",
200 "ldp q8, q9, [{addr_q}, #128]",
201 "ldp q10, q11, [{addr_q}, #160]",
202 "ldp q12, q13, [{addr_q}, #192]",
203 "ldp q14, q15, [{addr_q}, #224]",
204 "ldp q16, q17, [{addr_q}, #256]",
205 "ldp q18, q19, [{addr_q}, #288]",
206 "ldp q20, q21, [{addr_q}, #320]",
207 "ldp q22, q23, [{addr_q}, #352]",
208 "ldp q24, q25, [{addr_q}, #384]",
209 "ldp q26, q27, [{addr_q}, #416]",
210 "ldp q28, q29, [{addr_q}, #448]",
211 "ldp q30, q31, [{addr_q}, #480]",
212 addr_q = in(reg) addr_q,
213 );
214 restore_fpu_crsr(fpu);
215 }
216}
217
218#[allow(aarch64_softfloat_neon)]
222#[target_feature(enable = "neon")]
223pub unsafe fn restore_fpu_crsr(fpu: &FpuRegs) {
224 unsafe {
225 asm!(
226 "msr fpsr, {fpsr}",
227 "msr fpcr, {fpcr}",
228 fpsr = in(reg) fpu.fpsr,
229 fpcr = in(reg) fpu.fpcr,
230 );
231 }
232}
233
234#[allow(aarch64_softfloat_neon)]
238#[target_feature(enable = "sve")]
239unsafe fn save_sve(sve: &mut SveRegs, save_ffr: bool) {
240 let addr_z: u64 = sve.z.as_ptr() as u64;
241 let addr_p: u64 = sve.p.as_ptr() as u64;
242 unsafe {
243 asm!(
245 "str z0, [{addr_z}, #0, MUL VL]",
246 "str z1, [{addr_z}, #1, MUL VL]",
247 "str z2, [{addr_z}, #2, MUL VL]",
248 "str z3, [{addr_z}, #3, MUL VL]",
249 "str z4, [{addr_z}, #4, MUL VL]",
250 "str z5, [{addr_z}, #5, MUL VL]",
251 "str z6, [{addr_z}, #6, MUL VL]",
252 "str z7, [{addr_z}, #7, MUL VL]",
253 "str z8, [{addr_z}, #8, MUL VL]",
254 "str z9, [{addr_z}, #9, MUL VL]",
255 "str z10, [{addr_z}, #10, MUL VL]",
256 "str z11, [{addr_z}, #11, MUL VL]",
257 "str z12, [{addr_z}, #12, MUL VL]",
258 "str z13, [{addr_z}, #13, MUL VL]",
259 "str z14, [{addr_z}, #14, MUL VL]",
260 "str z15, [{addr_z}, #15, MUL VL]",
261 "str z16, [{addr_z}, #16, MUL VL]",
262 "str z17, [{addr_z}, #17, MUL VL]",
263 "str z18, [{addr_z}, #18, MUL VL]",
264 "str z19, [{addr_z}, #19, MUL VL]",
265 "str z20, [{addr_z}, #20, MUL VL]",
266 "str z21, [{addr_z}, #21, MUL VL]",
267 "str z22, [{addr_z}, #22, MUL VL]",
268 "str z23, [{addr_z}, #23, MUL VL]",
269 "str z24, [{addr_z}, #24, MUL VL]",
270 "str z25, [{addr_z}, #25, MUL VL]",
271 "str z26, [{addr_z}, #26, MUL VL]",
272 "str z27, [{addr_z}, #27, MUL VL]",
273 "str z28, [{addr_z}, #28, MUL VL]",
274 "str z29, [{addr_z}, #29, MUL VL]",
275 "str z30, [{addr_z}, #30, MUL VL]",
276 "str z31, [{addr_z}, #31, MUL VL]",
277 addr_z = in(reg) addr_z,
278 );
279 asm!(
281 "str p0, [{addr_p}, #0, MUL VL]",
282 "str p1, [{addr_p}, #1, MUL VL]",
283 "str p2, [{addr_p}, #2, MUL VL]",
284 "str p3, [{addr_p}, #3, MUL VL]",
285 "str p4, [{addr_p}, #4, MUL VL]",
286 "str p5, [{addr_p}, #5, MUL VL]",
287 "str p6, [{addr_p}, #6, MUL VL]",
288 "str p7, [{addr_p}, #7, MUL VL]",
289 "str p8, [{addr_p}, #8, MUL VL]",
290 "str p9, [{addr_p}, #9, MUL VL]",
291 "str p10, [{addr_p}, #10, MUL VL]",
292 "str p11, [{addr_p}, #11, MUL VL]",
293 "str p12, [{addr_p}, #12, MUL VL]",
294 "str p13, [{addr_p}, #13, MUL VL]",
295 "str p14, [{addr_p}, #14, MUL VL]",
296 "str p15, [{addr_p}, #15, MUL VL]",
297 addr_p = in(reg) addr_p,
298 );
299 if save_ffr {
300 let addr_ffr: u64 = core::ptr::addr_of!(sve.ffr) as u64;
301 asm!(
302 "rdffr p0.B",
303 "str p0, [{addr_ffr}]",
304 addr_ffr = in(reg) addr_ffr,
305 );
306 }
307 }
308}
309
310#[allow(aarch64_softfloat_neon)]
314#[inline(never)]
315#[target_feature(enable = "sve")]
316pub unsafe fn restore_sve(sve: &SveRegs, restore_ffr: bool) {
317 let addr_z: u64 = sve.z.as_ptr() as u64;
318 let addr_p: u64 = sve.p.as_ptr() as u64;
319 unsafe {
320 asm!(
322 "ldr z0, [{addr_z}, #0, MUL VL]",
323 "ldr z1, [{addr_z}, #1, MUL VL]",
324 "ldr z2, [{addr_z}, #2, MUL VL]",
325 "ldr z3, [{addr_z}, #3, MUL VL]",
326 "ldr z4, [{addr_z}, #4, MUL VL]",
327 "ldr z5, [{addr_z}, #5, MUL VL]",
328 "ldr z6, [{addr_z}, #6, MUL VL]",
329 "ldr z7, [{addr_z}, #7, MUL VL]",
330 "ldr z8, [{addr_z}, #8, MUL VL]",
331 "ldr z9, [{addr_z}, #9, MUL VL]",
332 "ldr z10, [{addr_z}, #10, MUL VL]",
333 "ldr z11, [{addr_z}, #11, MUL VL]",
334 "ldr z12, [{addr_z}, #12, MUL VL]",
335 "ldr z13, [{addr_z}, #13, MUL VL]",
336 "ldr z14, [{addr_z}, #14, MUL VL]",
337 "ldr z15, [{addr_z}, #15, MUL VL]",
338 "ldr z16, [{addr_z}, #16, MUL VL]",
339 "ldr z17, [{addr_z}, #17, MUL VL]",
340 "ldr z18, [{addr_z}, #18, MUL VL]",
341 "ldr z19, [{addr_z}, #19, MUL VL]",
342 "ldr z20, [{addr_z}, #20, MUL VL]",
343 "ldr z21, [{addr_z}, #21, MUL VL]",
344 "ldr z22, [{addr_z}, #22, MUL VL]",
345 "ldr z23, [{addr_z}, #23, MUL VL]",
346 "ldr z24, [{addr_z}, #24, MUL VL]",
347 "ldr z25, [{addr_z}, #25, MUL VL]",
348 "ldr z26, [{addr_z}, #26, MUL VL]",
349 "ldr z27, [{addr_z}, #27, MUL VL]",
350 "ldr z28, [{addr_z}, #28, MUL VL]",
351 "ldr z29, [{addr_z}, #29, MUL VL]",
352 "ldr z30, [{addr_z}, #30, MUL VL]",
353 "ldr z31, [{addr_z}, #31, MUL VL]",
354 addr_z = in(reg) addr_z,
355 );
356
357 if restore_ffr {
358 let addr_ffr: u64 = core::ptr::addr_of!(sve.ffr) as u64;
359 asm!(
360 "ldr p0, [{addr_ffr}]",
361 "wrffr p0.B",
362 addr_ffr = in(reg) addr_ffr,
363 );
364 }
365 asm!(
367 "ldr p0, [{addr_p}, #0, MUL VL]",
368 "ldr p1, [{addr_p}, #1, MUL VL]",
369 "ldr p2, [{addr_p}, #2, MUL VL]",
370 "ldr p3, [{addr_p}, #3, MUL VL]",
371 "ldr p4, [{addr_p}, #4, MUL VL]",
372 "ldr p5, [{addr_p}, #5, MUL VL]",
373 "ldr p6, [{addr_p}, #6, MUL VL]",
374 "ldr p7, [{addr_p}, #7, MUL VL]",
375 "ldr p8, [{addr_p}, #8, MUL VL]",
376 "ldr p9, [{addr_p}, #9, MUL VL]",
377 "ldr p10, [{addr_p}, #10, MUL VL]",
378 "ldr p11, [{addr_p}, #11, MUL VL]",
379 "ldr p12, [{addr_p}, #12, MUL VL]",
380 "ldr p13, [{addr_p}, #13, MUL VL]",
381 "ldr p14, [{addr_p}, #14, MUL VL]",
382 "ldr p15, [{addr_p}, #15, MUL VL]",
383 addr_p = in(reg) addr_p,
384 );
385 }
386}
387
388fn preserve_ffr(svcr: u64) -> bool {
389 let svcr: InMemoryRegister<u64, SVCR::Register> = InMemoryRegister::new(svcr);
390 let mut rtn = true;
391
392 let is_streaming = sme_en() && svcr.read(SVCR::SM) != 0;
393 if is_streaming {
394 rtn = SMCR_EL2.read(SMCR_EL2::FA64) != 0;
395 }
396 rtn
397}
398
399pub fn restore_state_lazy(rec: &Rec<'_>) {
403 let rec_simd_ctxt = &rec.context.simd;
404 let simd_aux = rec.aux(RecAuxIndex::SIMD as usize) as usize;
405 let simd_granule = match get_granule_if!(simd_aux, GranuleState::RecAux) {
406 Ok(guard) => guard,
407 Err(_e) => {
408 error!("Unable to get RecAux granule at 0x{:x}", simd_aux);
409 return;
410 }
411 };
412 let rec_simd = simd_granule.content::<SimdRegister>().unwrap();
413 let mut ns_simd = NS_SIMD[get_cpu_id()].lock();
414
415 CPTR_EL2.write(CPTR_EL2::TAM::SET);
417 if rec_simd_ctxt.cfg.sve_en {
418 ns_simd.sve.zcr_el2 = ZCR_EL2.get();
419 ns_simd.sve.zcr_el12 = ZCR_EL1.get();
420 ZCR_EL2.set(rec_simd.sve.zcr_el2);
422 ZCR_EL1.set(rec_simd.sve.zcr_el12);
423 #[cfg(not(any(test, miri, fuzzing)))]
424 unsafe {
425 let save_ffr = preserve_ffr(ns_simd.svcr);
427 save_sve(&mut ns_simd.sve, save_ffr);
428 save_fpu_crsr(&mut ns_simd.fpu);
429 if sme_en() {
430 SVCR.set(rec_simd.svcr);
431 }
432
433 if rec_simd_ctxt.is_saved {
434 let restore_ffr = true; restore_sve(&rec_simd.sve, restore_ffr);
436 restore_fpu_crsr(&rec_simd.fpu);
437 }
438 }
439 } else {
440 unsafe {
441 save_fpu(&mut ns_simd.fpu);
442 if rec_simd_ctxt.is_saved {
443 restore_fpu(&rec_simd.fpu);
444 }
445 }
446 }
447}
448
449pub fn restore_state(rec: &Rec<'_>) {
450 let simd_aux = rec.aux(RecAuxIndex::SIMD as usize) as usize;
451 let simd_granule = match get_granule_if!(simd_aux, GranuleState::RecAux) {
452 Ok(guard) => guard,
453 Err(_e) => {
454 error!("Unable to get RecAux granule at 0x{:x}", simd_aux);
455 return;
456 }
457 };
458 let rec_simd = simd_granule.content::<SimdRegister>().unwrap();
459 let mut ns_simd = NS_SIMD[get_cpu_id()].lock();
460
461 CPTR_EL2.write(CPTR_EL2::TAM::SET);
463
464 if sme_en() {
468 ns_simd.svcr = SVCR.get();
469 }
474 ns_simd.cptr_el2 = CPTR_EL2.get();
475 CPTR_EL2.set(rec_simd.cptr_el2);
476}
477
478pub fn save_state(rec: &mut Rec<'_>) {
479 let simd_aux = rec.aux(RecAuxIndex::SIMD as usize) as usize;
480 let rec_simd_ctxt = &mut rec.context.simd;
481 let mut simd_granule = match get_granule_if!(simd_aux, GranuleState::RecAux) {
482 Ok(guard) => guard,
483 Err(_e) => {
484 error!("Unable to get RecAux granule at 0x{:x}", simd_aux);
485 return;
486 }
487 };
488 let mut rec_simd = simd_granule.content_mut::<SimdRegister>().unwrap();
489 let ns_simd = NS_SIMD[get_cpu_id()].lock();
490
491 rec_simd.cptr_el2 =
492 (CPTR_EL2::TAM::SET + CPTR_EL2::TSM::SET + CPTR_EL2::TFP::SET + CPTR_EL2::TZ::SET).value;
493 if !rec_simd_ctxt.is_used {
494 CPTR_EL2.set(ns_simd.cptr_el2);
495 if sme_en() {
496 SVCR.set(ns_simd.svcr);
497 }
498 return;
499 }
500 CPTR_EL2.write(CPTR_EL2::TAM::SET);
502 if rec_simd_ctxt.cfg.sve_en {
505 rec_simd.sve.zcr_el2 = ZCR_EL2.get();
506 rec_simd.sve.zcr_el12 = ZCR_EL1.get();
507 unsafe {
508 let save_ffr = true; save_sve(&mut rec_simd.sve, save_ffr);
510 save_fpu_crsr(&mut rec_simd.fpu);
511 if sme_en() {
514 SVCR.set(ns_simd.svcr);
515 }
516 let restore_ffr = preserve_ffr(ns_simd.svcr);
518 restore_sve(&ns_simd.sve, restore_ffr);
519 restore_fpu_crsr(&ns_simd.fpu);
520 }
521 ZCR_EL2.set(ns_simd.sve.zcr_el2);
522 ZCR_EL1.set(ns_simd.sve.zcr_el12);
523 } else {
524 unsafe {
526 save_fpu(&mut rec_simd.fpu);
527 restore_fpu(&ns_simd.fpu);
528 }
529 }
530 rec_simd_ctxt.is_used = false;
533 rec_simd_ctxt.is_saved = true;
534 CPTR_EL2.set(ns_simd.cptr_el2);
535}