diff --git a/.gitignore b/.gitignore index ef360e22..0691f2db 100644 --- a/.gitignore +++ b/.gitignore @@ -96,6 +96,7 @@ scripts/parallels/breenix-efi.hds/ # Debug capture files serial-capture*.txt xhci_ftrace*.txt +.factory-runs/ logs/linux-probe-cpu0/ logs/breenix-parallels-cpu0/ .claude/worktrees/ diff --git a/docs/planning/ARM64_CPU0_SMP_INVESTIGATION.md b/docs/planning/ARM64_CPU0_SMP_INVESTIGATION.md index 4ca0f622..79e0c354 100644 --- a/docs/planning/ARM64_CPU0_SMP_INVESTIGATION.md +++ b/docs/planning/ARM64_CPU0_SMP_INVESTIGATION.md @@ -2867,3 +2867,122 @@ non-garbled per-CPU audit of `ICC_SRE_EL1`, `ICC_CTLR_EL1`, `ICC_PMR_EL1`, `ICC_IGRPEN1_EL1`, MPIDR affinity, and the selected GICR frame, then compare the redistributor wake/config state for CPUs that later report SPI34 stuck pending+active. + +## 2026-04-16 F13: Per-CPU GIC Audit and Idle-Scan Breadcrumbs + +Branch: `diagnostic/f13-gic-audit` + +F13 extended the GICv3 CPU-interface audit and added three AHCI ring +breadcrumbs around the idle-CPU scan in `isr_unblock_for_io()`. The per-CPU +audit values are captured at each CPU's ICC init and emitted from CPU0 after SMP +bring-up to avoid secondary boot raw-UART bytes corrupting the audit prefix. + +### Linux expected values + +| Field | Linux reference | Linux expected | F13 Breenix observed | Divergence | +| --- | --- | --- | --- | --- | +| `ICC_SRE_EL1` | `/tmp/linux-v6.8/include/linux/irqchip/arm-gic-v3.h:644-656`; `/tmp/linux-v6.8/drivers/irqchip/irq-gic-v3.c:1147-1155` | SRE bit set | `0x7` on CPUs 0-7 | No. SRE, DFB, and DIB are set. | +| `ICC_CTLR_EL1` | `/tmp/linux-v6.8/drivers/irqchip/irq-gic-v3.c:1188-1193` | EOImode drop bit set | `0x40402` on CPUs 0-7 | No for EOImode. Extra implementation bits are present. | +| `ICC_PMR_EL1` | `/tmp/linux-v6.8/drivers/irqchip/irq-gic-v3.c:1161-1164` | `DEFAULT_PMR_VALUE` = `0xf0` | `0xf8` on CPUs 0-7 | Yes. All CPUs use a stricter mask than Linux default. | +| `ICC_IGRPEN1_EL1` | `/tmp/linux-v6.8/drivers/irqchip/irq-gic-v3.c:1231-1233` | `1` | `0x1` on CPUs 0-7 | No. | +| Redistributor lookup | `/tmp/linux-v6.8/drivers/irqchip/irq-gic-v3.c:978-1011`, `:1017-1045`, `:1274-1299` | Iterate GICR frames and match `GICR_TYPER[63:32]` to MPIDR affinity before enabling the redistributor | F13 diagnostic reads by `target_cpu * 0x20000`; all dumped `GICR_TYPER` values are `0x0` | Yes or diagnostic-map invalid. Linux does not assume logical CPU ID equals redist frame index without an affinity match. | + +Linux register offsets used for the GICR dump are from +`/tmp/linux-v6.8/include/linux/irqchip/arm-gic-v3.h:114-125`; WAKER bit meanings +are at `:148-149`. + +### Validation sweep + +Artifacts: + +```text +logs/breenix-parallels-cpu0/f13-gic-audit/run{1..5}/ +``` + +| Run | AHCI timeout | `gic_cpu_audit_lines` | `gicr_waker_lines` | `scan_start` | `scan_cpu` | `scan_done` | Last emitted scan/unblock site | +| --- | ---: | ---: | ---: | ---: | ---: | ---: | --- | +| run1 | 0 | 8 | 0 | 0 | 0 | 0 | none | +| run2 | 0 | 8 | 0 | 0 | 0 | 0 | none | +| run3 | 2 | 8 | 16 | 0 | 0 | 0 | none in dumped ring; failure was SGI-side only | +| run4 | 2 | 8 | 16 | 1 | 2 | 0 | `UNBLOCK_SCAN_CPU` / `UNBLOCK_PER_SGI` | +| run5 | 2 | 8 | 16 | 0 | 0 | 1 | `UNBLOCK_SCAN_DONE` / `UNBLOCK_AFTER_CPU` | + +The sweep is still FAIL: 2/5 runs had no AHCI timeout, 3/5 produced AHCI +timeouts. The new audit coverage criterion is met: every run produced eight +`[GIC_CPU_AUDIT]` rows. + +### GIC CPU audit rows + +Representative rows from run4: + +```text +[GIC_CPU_AUDIT] cpu=0 mpidr=0x80000000 sre=0x7 ctlr=0x40402 pmr=0xf8 igrpen1=0x1 mismatch=1 +[GIC_CPU_AUDIT] cpu=1 mpidr=0x80000001 sre=0x7 ctlr=0x40402 pmr=0xf8 igrpen1=0x1 mismatch=1 +[GIC_CPU_AUDIT] cpu=2 mpidr=0x80000002 sre=0x7 ctlr=0x40402 pmr=0xf8 igrpen1=0x1 mismatch=1 +T3[GIC_CPU_AUDIT] cpu=3 mpidr=0x80000003 sre=0x7 ctlr=0x40402 pmr=0xf8 igrpen1=0x1 mismatch=1 +[GIC_CPU_AUDIT] cpu=4 mpidr=0x80000004 sre=0x7 ctlr=0x40402 pmr=0xf8 igrpen1=0x1 mismatch=1 +[GIC_CPU_AUDIT] cpu=5 mpidr=0x80000005 sre=0x7 ctlr=0x40402 pmr=0xf8 igrpen1=0x1 mismatch=1 +[GIC_CPU_AUDIT] cpu=6 mpidr=0x80000006 sre=0x7 ctlr=0x40402 pmr=0xf8 igrpen1=0x1 mismatch=1 +T4[GIC_CPU_AUDIT] cpu=7 mpidr=0x80000007 sre=0x7 ctlr=0x40402 pmr=0xf8 igrpen1=0x1 mismatch=1 +``` + +The `T3`/`T4` prefixes are raw timer breadcrumbs interleaving before the locked +serial line; the `[GIC_CPU_AUDIT]` payload itself is intact. The mismatch flag is +set on every CPU because `ICC_PMR_EL1` is `0xf8`, not Linux's `0xf0`. + +### GICR state at timeout + +Run4's stuck unblock selected target CPU 1 immediately before entering +`send_sgi()`: + +```text +[AHCI_RING] nsec=2388322625 site=UNBLOCK_PER_SGI cpu=0 port=0 IS=0x0 CI=0x0 SACT=0x0 SERR=0x0 waiter_tid=11 slot_mask=0x1 token=0 wake_success=0 seq=5749 +[AHCI_RING] nsec=2388321750 site=UNBLOCK_SCAN_CPU cpu=0 port=0 IS=0x0 CI=0x0 SACT=0x0 SERR=0x0 waiter_tid=11 slot_mask=0x1 token=0 wake_success=0 seq=5748 +[AHCI_RING] nsec=2388320875 site=UNBLOCK_SCAN_CPU cpu=0 port=0 IS=0x0 CI=0x0 SACT=0x0 SERR=0x0 waiter_tid=11 slot_mask=0x0 token=0 wake_success=0 seq=5747 +[AHCI_RING] nsec=2388319166 site=UNBLOCK_SCAN_START cpu=0 port=0 IS=0x0 CI=0x0 SACT=0x0 SERR=0x0 waiter_tid=11 slot_mask=0x0 token=0 wake_success=0 seq=5745 +``` + +The target CPU 1 GICR dump from the same timeout: + +```text +[GICR_STATE] cpu=1 waker=0x0 ctlr=0x0 typer=0x0 syncr=0x0 +``` + +All target-history CPUs dumped in runs 3-5 had the same shape: +`waker=0x0 ctlr=0x0 typer=0x0 syncr=0x0`. `WAKER=0` means +`ProcessorSleep=0` and `ChildrenAsleep=0`, so the dumped frame does not look +asleep. The concerning value is `GICR_TYPER=0x0`: Linux expects redistributor +selection to be validated by matching `GICR_TYPER[63:32]` to CPU affinity. F13's +dump therefore does not prove the redistributor for CPU 1 is healthy; it proves +the current logical-CPU-to-GICR-frame diagnostic map is suspect. + +### Interpretation + +F13 did not reproduce a CPU-specific SRE/CTLR failure. Every captured CPU has +SRE enabled, EOImode enabled, and Group1 enabled. The only ICC divergence is +global, not CPU-specific: PMR is `0xf8` on every CPU versus Linux's `0xf0`. + +The timeout corridor is not purely "before the scan" anymore. Run4 shows the +wake path entered the scan, inspected CPU 0 and CPU 1, emitted +`UNBLOCK_PER_SGI` for target CPU 1, and did not emit `UNBLOCK_SCAN_DONE`. That +narrows this instance to the scan-to-SGI handoff, most likely between +`UNBLOCK_PER_SGI` and the first `SGI_ENTRY` breadcrumb inside `send_sgi()`. + +Verdict: not (a) a specific CPU with broken SRE/CTLR. Not proven (b) a sleeping +redistributor, because WAKER is clear, but GICR selection is not trustworthy +while `GICR_TYPER` reads as zero for every target frame. The current best answer +is (c) still inside the scan/SGI handoff logic, with a parallel redistributor +mapping defect to resolve before trusting target-state dumps. + +### F14 recommendation + +F14 should do two targeted things: + +1. Add Linux-style redistributor discovery: iterate GICR frames, read + `GICR_TYPER`, match affinity bits against MPIDR, and store a per-CPU RD base. + Use that map for `dump_gicr_state_for_cpu()` first; only change live GICR init + semantics after the diagnostic map proves the current frame index is wrong. +2. Add two more ring sites around the call in `isr_unblock_for_io()`: + `UNBLOCK_BEFORE_SEND_SGI` and `UNBLOCK_AFTER_SEND_SGI`. Run4's latest + sequence stops at `UNBLOCK_PER_SGI`, so F14 needs to distinguish call-entry + failure from an early `send_sgi()` body failure. diff --git a/kernel/src/arch_impl/aarch64/gic.rs b/kernel/src/arch_impl/aarch64/gic.rs index 5a2f2c3f..255527ee 100644 --- a/kernel/src/arch_impl/aarch64/gic.rs +++ b/kernel/src/arch_impl/aarch64/gic.rs @@ -18,7 +18,7 @@ #![allow(dead_code)] use crate::arch_impl::traits::InterruptController; -use core::sync::atomic::{AtomicBool, AtomicU32, AtomicU8, Ordering}; +use core::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, AtomicU8, Ordering}; // ============================================================================= // Fault-tolerant MMIO probe support @@ -907,7 +907,11 @@ pub fn dump_stuck_state_for_spi(intid: u32) { raw_uart_str(" bits="); raw_uart_hex(icfgr_bits as u64); raw_uart_str(" trigger="); - raw_uart_str(if (icfgr_bits & 0b10) != 0 { "edge" } else { "level" }); + raw_uart_str(if (icfgr_bits & 0b10) != 0 { + "edge" + } else { + "level" + }); raw_uart_str("\n"); raw_dump_prefix(intid); @@ -999,6 +1003,12 @@ pub fn dump_stuck_state_for_spi(intid: u32) { ); if intid == 34 { + let mut sgi_targets = [0u8; 8]; + let target_count = crate::drivers::ahci::collect_recent_sgi_targets(&mut sgi_targets); + for target in sgi_targets.iter().take(target_count) { + dump_gicr_state_for_cpu(*target as usize); + } + raw_dump_prefix(intid); raw_uart_str("AHCI_PORT0_IS="); if let Some(port0_is) = crate::drivers::ahci::port0_is_snapshot() { @@ -1043,19 +1053,13 @@ pub fn send_sgi(sgi_id: u8, target_cpu: u8) { // Bits 27:24 = INTID (SGI number) // For simple SMP (CPUs 0-7 in same affinity group): let target_list = 1u64 << (target_cpu as u64); - trace_sgi_boundary( - crate::drivers::ahci::AHCI_TRACE_SGI_AFTER_MPIDR, - target_cpu, - ); + trace_sgi_boundary(crate::drivers::ahci::AHCI_TRACE_SGI_AFTER_MPIDR, target_cpu); let sgir = ((sgi_id as u64) << 24) | target_list; trace_sgi_boundary( crate::drivers::ahci::AHCI_TRACE_SGI_AFTER_COMPOSE, target_cpu, ); - trace_sgi_boundary( - crate::drivers::ahci::AHCI_TRACE_SGI_BEFORE_MSR, - target_cpu, - ); + trace_sgi_boundary(crate::drivers::ahci::AHCI_TRACE_SGI_BEFORE_MSR, target_cpu); unsafe { core::arch::asm!("msr icc_sgi1r_el1, {}", in(reg) sgir, options(nomem, nostack)); } @@ -1067,19 +1071,13 @@ pub fn send_sgi(sgi_id: u8, target_cpu: u8) { } else { // GICv2: Write GICD_SGIR let target_mask = 1u32 << (target_cpu as u32); - trace_sgi_boundary( - crate::drivers::ahci::AHCI_TRACE_SGI_AFTER_MPIDR, - target_cpu, - ); + trace_sgi_boundary(crate::drivers::ahci::AHCI_TRACE_SGI_AFTER_MPIDR, target_cpu); let sgir = (target_mask << 16) | (sgi_id as u32); trace_sgi_boundary( crate::drivers::ahci::AHCI_TRACE_SGI_AFTER_COMPOSE, target_cpu, ); - trace_sgi_boundary( - crate::drivers::ahci::AHCI_TRACE_SGI_BEFORE_MSR, - target_cpu, - ); + trace_sgi_boundary(crate::drivers::ahci::AHCI_TRACE_SGI_BEFORE_MSR, target_cpu); gicd_write(0xF00, sgir); trace_sgi_boundary(crate::drivers::ahci::AHCI_TRACE_SGI_AFTER_MSR, target_cpu); trace_sgi_boundary(crate::drivers::ahci::AHCI_TRACE_SGI_AFTER_ISB, target_cpu); @@ -1089,18 +1087,7 @@ pub fn send_sgi(sgi_id: u8, target_cpu: u8) { #[inline(always)] fn trace_sgi_boundary(site: u32, target_cpu: u8) { - crate::drivers::ahci::push_ahci_event( - site, - 0, - 0, - 0, - 0, - 0, - 0, - target_cpu as u32, - 0, - false, - ); + crate::drivers::ahci::push_ahci_event(site, 0, 0, 0, 0, 0, 0, target_cpu as u32, 0, false); } /// Check if GIC is initialized @@ -1267,6 +1254,84 @@ const GICR_SGI_OFFSET: usize = 0x1_0000; // SGI_base is second 64KB frame const GICR_CTLR: usize = 0x000; const GICR_WAKER: usize = 0x014; const GICR_TYPER: usize = 0x008; +const GICR_SYNCR: usize = 0x0C0; + +const LINUX_EXPECTED_SRE_ENABLED: u64 = 1; +const LINUX_EXPECTED_CTLR_EOIMODE: u64 = 1 << 1; +const LINUX_EXPECTED_PMR: u64 = 0xF0; +const LINUX_EXPECTED_IGRPEN1_ENABLED: u64 = 1; +const GIC_CPU_AUDIT_SLOTS: usize = 8; + +static GIC_CPU_AUDIT_VALID: [AtomicBool; GIC_CPU_AUDIT_SLOTS] = [ + AtomicBool::new(false), + AtomicBool::new(false), + AtomicBool::new(false), + AtomicBool::new(false), + AtomicBool::new(false), + AtomicBool::new(false), + AtomicBool::new(false), + AtomicBool::new(false), +]; +static GIC_CPU_AUDIT_MPIDR: [AtomicU64; GIC_CPU_AUDIT_SLOTS] = [ + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), +]; +static GIC_CPU_AUDIT_SRE: [AtomicU64; GIC_CPU_AUDIT_SLOTS] = [ + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), +]; +static GIC_CPU_AUDIT_CTLR: [AtomicU64; GIC_CPU_AUDIT_SLOTS] = [ + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), +]; +static GIC_CPU_AUDIT_PMR: [AtomicU64; GIC_CPU_AUDIT_SLOTS] = [ + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), +]; +static GIC_CPU_AUDIT_IGRPEN1: [AtomicU64; GIC_CPU_AUDIT_SLOTS] = [ + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), + AtomicU64::new(0), +]; +static GIC_CPU_AUDIT_MISMATCH: [AtomicU8; GIC_CPU_AUDIT_SLOTS] = [ + AtomicU8::new(0), + AtomicU8::new(0), + AtomicU8::new(0), + AtomicU8::new(0), + AtomicU8::new(0), + AtomicU8::new(0), + AtomicU8::new(0), + AtomicU8::new(0), +]; /// GICR SGI_base registers (at GICR_SGI_OFFSET from RD_base) const GICR_IGROUPR0: usize = 0x080; @@ -1298,6 +1363,106 @@ fn get_cpu_id_from_mpidr() -> usize { (mpidr & 0xFF) as usize } +fn read_mpidr_el1() -> u64 { + let mpidr: u64; + unsafe { + core::arch::asm!("mrs {}, mpidr_el1", out(reg) mpidr, options(nomem, nostack)); + } + mpidr +} + +pub fn dump_gicr_state_for_cpu(target_cpu: usize) { + use crate::arch_impl::aarch64::context_switch::{raw_uart_dec, raw_uart_hex, raw_uart_str}; + + raw_uart_str("[GICR_STATE] cpu="); + raw_uart_dec(target_cpu as u64); + + if !GICR_VALID.load(Ordering::Relaxed) { + raw_uart_str(" unavailable=gicr_not_valid\n"); + return; + } + + let gicr_size = crate::platform_config::gicr_size() as usize; + let max_redists = if gicr_size > 0 { + gicr_size / GICR_FRAME_SIZE + } else { + 0 + }; + if max_redists > 0 && target_cpu >= max_redists { + raw_uart_str(" unavailable=out_of_range max_redists="); + raw_uart_dec(max_redists as u64); + raw_uart_str("\n"); + return; + } + + let cpu_offset = target_cpu * GICR_FRAME_SIZE; + let waker = gicr_read(cpu_offset, GICR_WAKER); + let ctlr = gicr_read(cpu_offset, GICR_CTLR); + let typer_lo = gicr_read(cpu_offset, GICR_TYPER) as u64; + let typer_hi = gicr_read(cpu_offset, GICR_TYPER + 4) as u64; + let typer = typer_lo | (typer_hi << 32); + let syncr = gicr_read(cpu_offset, GICR_SYNCR); + + raw_uart_str(" waker="); + raw_uart_hex(waker as u64); + raw_uart_str(" ctlr="); + raw_uart_hex(ctlr as u64); + raw_uart_str(" typer="); + raw_uart_hex(typer); + raw_uart_str(" syncr="); + raw_uart_hex(syncr as u64); + raw_uart_str("\n"); +} + +fn record_gic_cpu_audit( + cpu_id: usize, + mpidr: u64, + sre: u64, + ctlr: u64, + pmr: u64, + igrpen1: u64, + mismatch: bool, +) { + if cpu_id >= GIC_CPU_AUDIT_SLOTS { + return; + } + + GIC_CPU_AUDIT_MPIDR[cpu_id].store(mpidr, Ordering::Release); + GIC_CPU_AUDIT_SRE[cpu_id].store(sre, Ordering::Release); + GIC_CPU_AUDIT_CTLR[cpu_id].store(ctlr, Ordering::Release); + GIC_CPU_AUDIT_PMR[cpu_id].store(pmr, Ordering::Release); + GIC_CPU_AUDIT_IGRPEN1[cpu_id].store(igrpen1, Ordering::Release); + GIC_CPU_AUDIT_MISMATCH[cpu_id].store(if mismatch { 1 } else { 0 }, Ordering::Release); + GIC_CPU_AUDIT_VALID[cpu_id].store(true, Ordering::Release); +} + +/// Emit CPU-interface audit values captured when each CPU initialized ICC state. +/// +/// Secondary CPU boot.S emits raw UART breadcrumbs before Rust reaches the serial +/// lock, so printing these lines directly from each secondary can corrupt the +/// `[GIC_CPU_AUDIT]` prefix. This CPU0-owned snapshot keeps the required audit +/// rows parseable while preserving init-time register values. +pub fn dump_gic_cpu_audit_snapshot(max_cpus: usize) { + let count = max_cpus.min(GIC_CPU_AUDIT_SLOTS); + for cpu_id in 0..count { + if !GIC_CPU_AUDIT_VALID[cpu_id].load(Ordering::Acquire) { + crate::serial_println!("[GIC_CPU_AUDIT] cpu={} missing=1", cpu_id); + continue; + } + + crate::serial_println!( + "[GIC_CPU_AUDIT] cpu={} mpidr={:#x} sre={:#x} ctlr={:#x} pmr={:#x} igrpen1={:#x} mismatch={}", + cpu_id, + GIC_CPU_AUDIT_MPIDR[cpu_id].load(Ordering::Acquire), + GIC_CPU_AUDIT_SRE[cpu_id].load(Ordering::Acquire), + GIC_CPU_AUDIT_CTLR[cpu_id].load(Ordering::Acquire), + GIC_CPU_AUDIT_PMR[cpu_id].load(Ordering::Acquire), + GIC_CPU_AUDIT_IGRPEN1[cpu_id].load(Ordering::Acquire), + GIC_CPU_AUDIT_MISMATCH[cpu_id].load(Ordering::Acquire) + ); + } +} + /// Initialize GICv3 Distributor (GICD). fn init_gicv3_distributor() { // Disable distributor @@ -1482,22 +1647,6 @@ fn init_gicv3_cpu_interface() { let sre: u64 = 0x7; // SRE | DFB | DIB core::arch::asm!("msr icc_sre_el1, {}", in(reg) sre, options(nomem, nostack)); core::arch::asm!("isb", options(nomem, nostack)); - let sre_readback: u64; - core::arch::asm!("mrs {}, icc_sre_el1", out(reg) sre_readback, options(nomem, nostack)); - { - use crate::arch_impl::aarch64::context_switch::{ - raw_uart_dec, raw_uart_hex, raw_uart_str, - }; - - let cpu_id = get_cpu_id_from_mpidr(); - raw_uart_str("[SRE_AUDIT] cpu="); - raw_uart_dec(cpu_id as u64); - raw_uart_str(" sre="); - raw_uart_dec(sre_readback & 1); - raw_uart_str(" raw="); - raw_uart_hex(sre_readback); - raw_uart_str("\n"); - } // Explicitly set ICC_CTLR_EL1.EOImode = 1 (bit 1) so GICv3 uses split // priority-drop and deactivate semantics. This matches Linux's regular @@ -1542,6 +1691,35 @@ fn init_gicv3_cpu_interface() { } core::arch::asm!("isb", options(nomem, nostack)); + + let sre_readback: u64; + let ctlr_readback: u64; + let pmr_readback: u64; + let igrpen1_readback: u64; + core::arch::asm!("mrs {}, icc_sre_el1", out(reg) sre_readback, options(nomem, nostack)); + core::arch::asm!("mrs {}, icc_ctlr_el1", out(reg) ctlr_readback, options(nomem, nostack)); + core::arch::asm!("mrs {}, icc_pmr_el1", out(reg) pmr_readback, options(nomem, nostack)); + core::arch::asm!("mrs {}, icc_igrpen1_el1", out(reg) igrpen1_readback, options(nomem, nostack)); + { + let cpu_id = get_cpu_id_from_mpidr(); + let mpidr = read_mpidr_el1(); + let sre_enabled = sre_readback & 1; + let igrpen1_enabled = igrpen1_readback & 1; + let mismatch = sre_enabled != LINUX_EXPECTED_SRE_ENABLED + || (ctlr_readback & LINUX_EXPECTED_CTLR_EOIMODE) != LINUX_EXPECTED_CTLR_EOIMODE + || pmr_readback != LINUX_EXPECTED_PMR + || igrpen1_enabled != LINUX_EXPECTED_IGRPEN1_ENABLED; + + record_gic_cpu_audit( + cpu_id, + mpidr, + sre_readback, + ctlr_readback, + pmr_readback, + igrpen1_readback, + mismatch, + ); + } } } @@ -1576,7 +1754,10 @@ pub fn read_gicr_priority_cpu0(irq: u32) -> u8 { let cpu_offset = 0; // CPU 0 let reg_index = (irq / 4) as usize; let byte_index = (irq % 4) as usize; - let reg_val = gicr_read(cpu_offset + GICR_SGI_OFFSET, GICR_IPRIORITYR0 + reg_index * 4); + let reg_val = gicr_read( + cpu_offset + GICR_SGI_OFFSET, + GICR_IPRIORITYR0 + reg_index * 4, + ); ((reg_val >> (byte_index * 8)) & 0xFF) as u8 } diff --git a/kernel/src/drivers/ahci/mod.rs b/kernel/src/drivers/ahci/mod.rs index 43408bea..084bdd03 100644 --- a/kernel/src/drivers/ahci/mod.rs +++ b/kernel/src/drivers/ahci/mod.rs @@ -279,6 +279,9 @@ pub(crate) const AHCI_TRACE_SGI_AFTER_ISB: u32 = 20; pub(crate) const AHCI_TRACE_SGI_EXIT: u32 = 21; pub(crate) const AHCI_TRACE_WAKEBUF_BEFORE_PUSH: u32 = 22; pub(crate) const AHCI_TRACE_WAKEBUF_AFTER_PUSH: u32 = 23; +pub(crate) const AHCI_TRACE_SCAN_START: u32 = 24; +pub(crate) const AHCI_TRACE_SCAN_CPU: u32 = 25; +pub(crate) const AHCI_TRACE_SCAN_DONE: u32 = 26; struct AhciTraceSlot { site: AtomicU32, @@ -437,6 +440,9 @@ fn ahci_trace_site_name(site: u32) -> &'static str { AHCI_TRACE_SGI_EXIT => "SGI_EXIT", AHCI_TRACE_WAKEBUF_BEFORE_PUSH => "WAKEBUF_BEFORE_PUSH", AHCI_TRACE_WAKEBUF_AFTER_PUSH => "WAKEBUF_AFTER_PUSH", + AHCI_TRACE_SCAN_START => "UNBLOCK_SCAN_START", + AHCI_TRACE_SCAN_CPU => "UNBLOCK_SCAN_CPU", + AHCI_TRACE_SCAN_DONE => "UNBLOCK_SCAN_DONE", _ => "UNKNOWN", } } @@ -546,6 +552,54 @@ pub fn dump_recent_ahci_events(port_filter: Option, n: usize) { } } +fn is_sgi_target_trace_site(site: u32) -> bool { + matches!( + site, + AHCI_TRACE_UNBLOCK_PER_SGI + | AHCI_TRACE_SGI_ENTRY + | AHCI_TRACE_SGI_AFTER_MPIDR + | AHCI_TRACE_SGI_AFTER_COMPOSE + | AHCI_TRACE_SGI_BEFORE_MSR + | AHCI_TRACE_SGI_AFTER_MSR + | AHCI_TRACE_SGI_AFTER_ISB + | AHCI_TRACE_SGI_EXIT + ) +} + +pub(crate) fn collect_recent_sgi_targets(out: &mut [u8]) -> usize { + let mut count = 0usize; + + for cpu in 0..AHCI_TRACE_CPUS { + for index in 0..AHCI_TRACE_LEN { + let Some(event) = load_ahci_trace_slot(cpu, index, None) else { + continue; + }; + + if !is_sgi_target_trace_site(event.site) { + continue; + } + + let target = event.slot_mask as usize; + if target >= AHCI_TRACE_CPUS { + continue; + } + + if out[..count].iter().any(|seen| *seen as usize == target) { + continue; + } + + if count == out.len() { + return count; + } + + out[count] = target as u8; + count += 1; + } + } + + count +} + pub fn port0_is_snapshot() -> Option { port_is_snapshot(0) } @@ -1632,9 +1686,8 @@ fn dump_timeout_state_free(port: usize, cmd_num: u32) { #[cfg(target_arch = "aarch64")] { use crate::arch_impl::aarch64::timer_interrupt::{ - CPU0_LAST_TIMER_ELR, CPU0_BREADCRUMB_ID, CPU0_BREADCRUMB_CTL, CPU0_BREADCRUMB_SP, - CPU0_BREADCRUMB_ELR_SLOT, - CPU0_DISPATCH_TID, CPU0_DISPATCH_ELR, CPU0_DISPATCH_SPSR, + CPU0_BREADCRUMB_CTL, CPU0_BREADCRUMB_ELR_SLOT, CPU0_BREADCRUMB_ID, CPU0_BREADCRUMB_SP, + CPU0_DISPATCH_ELR, CPU0_DISPATCH_SPSR, CPU0_DISPATCH_TID, CPU0_LAST_TIMER_ELR, }; crate::serial_println!( "[ahci] cpu0_last_timer_elr={:#x} cpu0_breadcrumb={} ctl={:#x} sp={:#x} elr_slot={:#x}", @@ -1674,7 +1727,9 @@ fn dump_timeout_state_free(port: usize, cmd_num: u32) { } crate::serial_println!( "[ahci] IGRPEN1={:#x} ICC_CTLR={:#x} BPR1={:#x}", - igrpen1, icc_ctlr, bpr1, + igrpen1, + icc_ctlr, + bpr1, ); // Read GICD_ISENABLER1 to confirm SPI34 is still enabled at distributor #[cfg(target_arch = "aarch64")] @@ -1685,7 +1740,8 @@ fn dump_timeout_state_free(port: usize, cmd_num: u32) { let spi34_enabled = (gicd_isenabler1 >> 2) & 1; crate::serial_println!( "[ahci] GICD_ISENABLER1={:#x} SPI34_enabled={}", - gicd_isenabler1, spi34_enabled, + gicd_isenabler1, + spi34_enabled, ); } // Read CPU 0's GICR PPI enable and pending state (redistributor-level) @@ -1695,13 +1751,15 @@ fn dump_timeout_state_free(port: usize, cmd_num: u32) { let ppi27_enabled = (cpu0_ppi_enable >> 27) & 1; crate::serial_println!( "[ahci] CPU0_GICR_ISENABLER0={:#010x} PPI27_enabled={}", - cpu0_ppi_enable, ppi27_enabled, + cpu0_ppi_enable, + ppi27_enabled, ); let cpu0_ppi_pending = crate::arch_impl::aarch64::gic::read_gicr_ispendr0(0); let ppi27_pending = (cpu0_ppi_pending >> 27) & 1; crate::serial_println!( "[ahci] CPU0_GICR_ISPENDR0={:#010x} PPI27_pending={}", - cpu0_ppi_pending, ppi27_pending, + cpu0_ppi_pending, + ppi27_pending, ); } #[cfg(target_arch = "aarch64")] @@ -1714,7 +1772,8 @@ fn dump_timeout_state_free(port: usize, cmd_num: u32) { }; crate::serial_println!( "[ahci] PPI27_priority={:#x} SPI34_priority={:#x} PMR=0xf8", - ppi27_priority, spi34_priority, + ppi27_priority, + spi34_priority, ); } // Per-CPU SPSR_EL1 snapshots from the timer tick handler. @@ -1724,7 +1783,7 @@ fn dump_timeout_state_free(port: usize, cmd_num: u32) { // TIMER_TICK_COUNT detects if a CPU stopped receiving timer interrupts entirely. #[cfg(target_arch = "aarch64")] { - use crate::arch_impl::aarch64::timer_interrupt::{TIMER_TICK_DAIF, TIMER_TICK_COUNT}; + use crate::arch_impl::aarch64::timer_interrupt::{TIMER_TICK_COUNT, TIMER_TICK_DAIF}; crate::serial_println!( "[ahci] timeout_cpu={} cpu_spsr=[{:#x},{:#x},{:#x},{:#x},{:#x},{:#x},{:#x},{:#x}]", timeout_cpu, @@ -1748,7 +1807,7 @@ fn dump_timeout_state_free(port: usize, cmd_num: u32) { TIMER_TICK_COUNT[6].load(Ordering::Relaxed), TIMER_TICK_COUNT[7].load(Ordering::Relaxed), ); - use crate::arch_impl::aarch64::timer_interrupt::{TIMER_TICK_HW_CPU, TIMER_TICK_HW_COUNT}; + use crate::arch_impl::aarch64::timer_interrupt::{TIMER_TICK_HW_COUNT, TIMER_TICK_HW_CPU}; crate::serial_println!( "[ahci] hw_tick_count=[{},{},{},{},{},{},{},{}]", TIMER_TICK_HW_COUNT[0].load(Ordering::Relaxed), @@ -1807,7 +1866,7 @@ fn dump_timeout_state_free(port: usize, cmd_num: u32) { #[cfg(target_arch = "aarch64")] { use crate::arch_impl::aarch64::exception::{ - SYNC_EXCEPTION_COUNT, CPU0_LAST_SYNC_ESR, CPU0_LAST_SYNC_FAR, CPU0_LAST_SYNC_ELR, + CPU0_LAST_SYNC_ELR, CPU0_LAST_SYNC_ESR, CPU0_LAST_SYNC_FAR, SYNC_EXCEPTION_COUNT, }; crate::serial_println!( "[ahci] sync_exc_count=[{},{},{},{},{},{},{},{}]", @@ -1841,18 +1900,23 @@ fn dump_timeout_state_free(port: usize, cmd_num: u32) { } crate::serial_println!( "[ahci] timer: CNTV_CTL={:#x} CNTV_TVAL={} CNTP_CTL={:#x} CNTP_TVAL={} CNTFRQ={}", - cntv_ctl, cntv_tval as i64, cntp_ctl, cntp_tval as i64, cntfrq, + cntv_ctl, + cntv_tval as i64, + cntp_ctl, + cntp_tval as i64, + cntfrq, ); crate::serial_println!( "[ahci] timer: TICKS_PER_INTERRUPT={}", - crate::arch_impl::aarch64::timer_interrupt::TICKS_PER_INTERRUPT.load(core::sync::atomic::Ordering::Relaxed), + crate::arch_impl::aarch64::timer_interrupt::TICKS_PER_INTERRUPT + .load(core::sync::atomic::Ordering::Relaxed), ); } // CPU 0's last CVAL and CNTVCT from the timer handler, plus current CNTVCT. // delta = cntvct_now - cval: positive = timer expired long ago, negative = CVAL in future. #[cfg(target_arch = "aarch64")] { - use crate::arch_impl::aarch64::timer_interrupt::{CPU0_LAST_CVAL, CPU0_LAST_CNTVCT}; + use crate::arch_impl::aarch64::timer_interrupt::{CPU0_LAST_CNTVCT, CPU0_LAST_CVAL}; // Read current CNTVCT (shared across all CPUs, so valid from any CPU) let current_cntvct: u64; unsafe { @@ -1875,9 +1939,8 @@ fn dump_timeout_state_free(port: usize, cmd_num: u32) { #[cfg(target_arch = "aarch64")] { use crate::arch_impl::aarch64::context_switch::{ - CPU0_IDLE_DAIF, CPU0_IDLE_PMR, CPU0_IDLE_IGRPEN1, - CPU0_IDLE_CNTV_CTL, CPU0_IDLE_CNTV_CVAL, CPU0_IDLE_CNTVCT, - CPU0_IDLE_ITERATIONS, + CPU0_IDLE_CNTVCT, CPU0_IDLE_CNTV_CTL, CPU0_IDLE_CNTV_CVAL, CPU0_IDLE_DAIF, + CPU0_IDLE_IGRPEN1, CPU0_IDLE_ITERATIONS, CPU0_IDLE_PMR, }; let cval = CPU0_IDLE_CNTV_CVAL.load(Ordering::Relaxed); let cntvct = CPU0_IDLE_CNTVCT.load(Ordering::Relaxed); @@ -1891,7 +1954,8 @@ fn dump_timeout_state_free(port: usize, cmd_num: u32) { ); crate::serial_println!( "[ahci] cpu0_idle: cval={} cntvct={} delta={} ({}ms)", - cval, cntvct, + cval, + cntvct, cntvct.wrapping_sub(cval) as i64, cntvct.wrapping_sub(cval) / 24000, ); @@ -1899,10 +1963,7 @@ fn dump_timeout_state_free(port: usize, cmd_num: u32) { crate::serial_println!("[ahci] dumping trace buffer cpu0"); crate::tracing::dump_buffer(0); if timeout_cpu != 0 { - crate::serial_println!( - "[ahci] dumping trace buffer timeout_cpu={}", - timeout_cpu - ); + crate::serial_println!("[ahci] dumping trace buffer timeout_cpu={}", timeout_cpu); crate::tracing::dump_buffer(timeout_cpu as usize); } } diff --git a/kernel/src/main_aarch64.rs b/kernel/src/main_aarch64.rs index b77eaaad..69a41ad7 100644 --- a/kernel/src/main_aarch64.rs +++ b/kernel/src/main_aarch64.rs @@ -958,6 +958,9 @@ pub extern "C" fn kernel_main(hw_config_ptr: u64) -> ! { "[smp] {} CPUs online", kernel::arch_impl::aarch64::smp::cpus_online() ); + kernel::arch_impl::aarch64::gic::dump_gic_cpu_audit_snapshot( + kernel::arch_impl::aarch64::smp::cpus_online() as usize, + ); } // Test kthread lifecycle BEFORE creating userspace processes diff --git a/kernel/src/task/scheduler.rs b/kernel/src/task/scheduler.rs index 645043d7..0adb258c 100644 --- a/kernel/src/task/scheduler.rs +++ b/kernel/src/task/scheduler.rs @@ -96,12 +96,7 @@ impl IsrWakeupBuffer { fn push(&self, tid: u64) -> bool { for slot in &self.slots { if slot - .compare_exchange( - ISR_WAKEUP_EMPTY, - tid, - Ordering::AcqRel, - Ordering::Relaxed, - ) + .compare_exchange(ISR_WAKEUP_EMPTY, tid, Ordering::AcqRel, Ordering::Relaxed) .is_ok() { return true; @@ -121,8 +116,7 @@ impl IsrWakeupBuffer { } } -static ISR_WAKEUP_BUFFERS: [IsrWakeupBuffer; 8] = - [const { IsrWakeupBuffer::new() }; 8]; +static ISR_WAKEUP_BUFFERS: [IsrWakeupBuffer; 8] = [const { IsrWakeupBuffer::new() }; 8]; /// Global scheduler instance static SCHEDULER: Mutex> = Mutex::new(None); @@ -300,32 +294,32 @@ pub fn try_dump_state() -> Option { let (elr_el1, x30, sp) = (0, 0, t.context.rsp); ThreadDumpEntry { - id: t.id(), - state: match t.state { - ThreadState::Ready => 0, - ThreadState::Running => 1, - ThreadState::Blocked => 2, - ThreadState::BlockedOnSignal => 3, - ThreadState::BlockedOnChildExit => 4, - ThreadState::BlockedOnTimer => 5, - ThreadState::BlockedOnIO => 7, - ThreadState::Terminated => 6, - }, - blocked_in_syscall: t.blocked_in_syscall, - saved_by_inline_schedule: t.saved_by_inline_schedule, - inline_schedule_caller_lr: t.inline_schedule_caller_lr, - inline_schedule_saved_sp: t.inline_schedule_saved_sp, - has_wake_time: t.wake_time_ns.is_some(), - privilege: if t.privilege == super::thread::ThreadPrivilege::Kernel { - 0 - } else { - 1 - }, - owner_pid: t.owner_pid.unwrap_or(0), - elr_el1, - x30, - sp, - } + id: t.id(), + state: match t.state { + ThreadState::Ready => 0, + ThreadState::Running => 1, + ThreadState::Blocked => 2, + ThreadState::BlockedOnSignal => 3, + ThreadState::BlockedOnChildExit => 4, + ThreadState::BlockedOnTimer => 5, + ThreadState::BlockedOnIO => 7, + ThreadState::Terminated => 6, + }, + blocked_in_syscall: t.blocked_in_syscall, + saved_by_inline_schedule: t.saved_by_inline_schedule, + inline_schedule_caller_lr: t.inline_schedule_caller_lr, + inline_schedule_saved_sp: t.inline_schedule_saved_sp, + has_wake_time: t.wake_time_ns.is_some(), + privilege: if t.privilege == super::thread::ThreadPrivilege::Kernel { + 0 + } else { + 1 + }, + owner_pid: t.owner_pid.unwrap_or(0), + elr_el1, + x30, + sp, + } }) .collect(); @@ -1276,7 +1270,11 @@ impl Scheduler { self.per_cpu_queues[target].push_back(thread_id); // CRITICAL: Only log on x86_64 to avoid deadlock on ARM64 #[cfg(target_arch = "x86_64")] - log_serial_println!("unblock({}): Added to per_cpu_queues[{}]", thread_id, target); + log_serial_println!( + "unblock({}): Added to per_cpu_queues[{}]", + thread_id, + target + ); // Send IPI to wake an idle CPU so it can pick up the unblocked thread #[cfg(target_arch = "aarch64")] @@ -1551,7 +1549,11 @@ impl Scheduler { self.per_cpu_queues[target].push_back(thread_id); // CRITICAL: Only log on x86_64 to avoid deadlock on ARM64 #[cfg(target_arch = "x86_64")] - log_serial_println!("Thread {} unblocked by child exit, queued to cpu {}", thread_id, target); + log_serial_println!( + "Thread {} unblocked by child exit, queued to cpu {}", + thread_id, + target + ); // Send IPI to wake an idle CPU #[cfg(target_arch = "aarch64")] @@ -1740,8 +1742,7 @@ impl Scheduler { // wake_time set. let is_timed_wait = if let Some(thread) = self.get_thread(tid) { (matches!(thread.state, ThreadState::BlockedOnTimer) - || (thread.state == ThreadState::BlockedOnIO - && thread.wake_time_ns.is_some())) + || (thread.state == ThreadState::BlockedOnIO && thread.wake_time_ns.is_some())) && thread.wake_time_ns.is_some() } else { false @@ -2613,6 +2614,18 @@ pub fn isr_unblock_for_io(tid: u64) { // Send IPI to idle CPUs so the buffer is drained promptly. #[cfg(target_arch = "aarch64")] { + crate::drivers::ahci::push_ahci_event( + crate::drivers::ahci::AHCI_TRACE_SCAN_START, + 0, + 0, + 0, + 0, + 0, + tid, + 0, + 0, + false, + ); let online = crate::arch_impl::aarch64::smp::cpus_online() as usize; crate::drivers::ahci::push_ahci_event( crate::drivers::ahci::AHCI_TRACE_UNBLOCK_BEFORE_SGI_SCAN, @@ -2627,6 +2640,18 @@ pub fn isr_unblock_for_io(tid: u64) { false, ); for target in 0..online.min(MAX_CPUS) { + crate::drivers::ahci::push_ahci_event( + crate::drivers::ahci::AHCI_TRACE_SCAN_CPU, + 0, + 0, + 0, + 0, + 0, + tid, + target as u32, + 0, + false, + ); if target != cpu && is_cpu_idle(target) { crate::drivers::ahci::push_ahci_event( crate::drivers::ahci::AHCI_TRACE_UNBLOCK_PER_SGI, @@ -2646,6 +2671,18 @@ pub fn isr_unblock_for_io(tid: u64) { ); } } + crate::drivers::ahci::push_ahci_event( + crate::drivers::ahci::AHCI_TRACE_SCAN_DONE, + 0, + 0, + 0, + 0, + 0, + tid, + 0, + 0, + false, + ); } #[cfg(target_arch = "aarch64")] crate::drivers::ahci::push_ahci_event(