From a3125be955df9dcbbf1062c606797cf839f8a955 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Fri, 21 Mar 2025 19:42:07 -0700 Subject: [PATCH] power: Refactor TDP limiting to allow for different backends --- data/platforms/jupiter.toml | 3 + src/manager/root.rs | 16 +++- src/manager/user.rs | 40 +++++++--- src/platform.rs | 24 +++++- src/power.rs | 151 ++++++++++++++++++++++++------------ 5 files changed, 167 insertions(+), 67 deletions(-) diff --git a/data/platforms/jupiter.toml b/data/platforms/jupiter.toml index fd43cc5..f7981c9 100644 --- a/data/platforms/jupiter.toml +++ b/data/platforms/jupiter.toml @@ -30,6 +30,9 @@ no_validate_flag = "--skip-validation" systemd = "jupiter-fan-control.service" [tdp_limit] +method = "gpu_hwmon" + +[tdp_limit.range] min = 3 max = 15 diff --git a/src/manager/root.rs b/src/manager/root.rs index ff02d15..619b57a 100644 --- a/src/manager/root.rs +++ b/src/manager/root.rs @@ -27,8 +27,8 @@ use crate::job::JobManager; use crate::platform::platform_config; use crate::power::{ set_cpu_scaling_governor, set_gpu_clocks, set_gpu_performance_level, set_gpu_power_profile, - set_max_charge_level, set_platform_profile, set_tdp_limit, CPUScalingGovernor, - GPUPerformanceLevel, GPUPowerProfile, + set_max_charge_level, set_platform_profile, tdp_limit_manager, CPUScalingGovernor, + GPUPerformanceLevel, GPUPowerProfile, TdpLimitManager, }; use crate::process::{run_script, script_output}; use crate::wifi::{ @@ -70,6 +70,7 @@ pub struct SteamOSManager { channel: Sender, wifi_debug_mode: WifiDebugMode, fan_control: FanControl, + tdp_limit_manager: Option>, // Whether we should use trace-cmd or not. // True on galileo devices, false otherwise should_trace: bool, @@ -81,6 +82,7 @@ impl SteamOSManager { Ok(SteamOSManager { fan_control: FanControl::new(connection.clone()), wifi_debug_mode: WifiDebugMode::Off, + tdp_limit_manager: tdp_limit_manager().await.ok(), should_trace: steam_deck_variant().await? == SteamDeckVariant::Galileo, job_manager: JobManager::new(connection.clone()).await?, connection, @@ -301,7 +303,15 @@ impl SteamOSManager { } async fn set_tdp_limit(&self, limit: u32) -> fdo::Result<()> { - set_tdp_limit(limit).await.map_err(to_zbus_fdo_error) + let Some(ref manager) = self.tdp_limit_manager else { + return Err(fdo::Error::Failed(String::from( + "TDP limiting not configured", + ))); + }; + manager + .set_tdp_limit(limit) + .await + .map_err(to_zbus_fdo_error) } #[zbus(property)] diff --git a/src/manager/user.rs b/src/manager/user.rs index 839b4d9..4663c26 100644 --- a/src/manager/user.rs +++ b/src/manager/user.rs @@ -28,7 +28,7 @@ use crate::power::{ get_available_cpu_scaling_governors, get_available_gpu_performance_levels, get_available_gpu_power_profiles, get_available_platform_profiles, get_cpu_scaling_governor, get_gpu_clocks, get_gpu_clocks_range, get_gpu_performance_level, get_gpu_power_profile, - get_max_charge_level, get_platform_profile, get_tdp_limit, get_tdp_limit_range, + get_max_charge_level, get_platform_profile, tdp_limit_manager, TdpLimitManager, }; use crate::wifi::{ get_wifi_backend, get_wifi_power_management_state, list_wifi_interfaces, WifiBackend, @@ -132,6 +132,7 @@ struct GpuPowerProfile1 { struct TdpLimit1 { proxy: Proxy<'static>, + manager: Box, } struct HdmiCec1 { @@ -528,7 +529,7 @@ impl Storage1 { impl TdpLimit1 { #[zbus(property(emits_changed_signal = "false"))] async fn tdp_limit(&self) -> u32 { - get_tdp_limit().await.unwrap_or(0) + self.manager.get_tdp_limit().await.unwrap_or(0) } #[zbus(property)] @@ -538,12 +539,20 @@ impl TdpLimit1 { #[zbus(property(emits_changed_signal = "const"))] async fn tdp_limit_min(&self) -> u32 { - get_tdp_limit_range().await.map(|r| *r.start()).unwrap_or(0) + self.manager + .get_tdp_limit_range() + .await + .map(|r| *r.start()) + .unwrap_or(0) } #[zbus(property(emits_changed_signal = "const"))] async fn tdp_limit_max(&self) -> u32 { - get_tdp_limit_range().await.map(|r| *r.end()).unwrap_or(0) + self.manager + .get_tdp_limit_range() + .await + .map(|r| *r.end()) + .unwrap_or(0) } } @@ -741,9 +750,6 @@ pub(crate) async fn create_interfaces( proxy: proxy.clone(), channel: daemon, }; - let tdp_limit = TdpLimit1 { - proxy: proxy.clone(), - }; let wifi_debug = WifiDebug1 { proxy: proxy.clone(), }; @@ -796,8 +802,16 @@ pub(crate) async fn create_interfaces( object_server.at(MANAGER_PATH, manager2).await?; - if get_tdp_limit().await.is_ok() { - object_server.at(MANAGER_PATH, tdp_limit).await?; + if let Ok(manager) = tdp_limit_manager().await { + object_server + .at( + MANAGER_PATH, + TdpLimit1 { + proxy: proxy.clone(), + manager, + }, + ) + .await?; } if steam_deck_variant().await.unwrap_or_default() == SteamDeckVariant::Galileo { @@ -822,8 +836,9 @@ mod test { use crate::hardware::SteamDeckVariant; use crate::platform::{ BatteryChargeLimitConfig, PerformanceProfileConfig, PlatformConfig, RangeConfig, - ResetConfig, ScriptConfig, ServiceConfig, StorageConfig, + ResetConfig, ScriptConfig, ServiceConfig, StorageConfig, TdpLimitConfig, }; + use crate::power::TdpLimitingMethod; use crate::systemd::test::{MockManager, MockUnit}; use crate::{path, power, testing}; @@ -849,7 +864,10 @@ mod test { fan_control: Some(ServiceConfig::Systemd(String::from( "jupiter-fan-control.service", ))), - tdp_limit: Some(RangeConfig::new(3, 15)), + tdp_limit: Some(TdpLimitConfig { + method: TdpLimitingMethod::GpuHwmon, + range: Some(RangeConfig::new(3, 15)), + }), gpu_clocks: Some(RangeConfig::new(200, 1600)), battery_charge_limit: Some(BatteryChargeLimitConfig { suggested_minimum_limit: Some(10), diff --git a/src/platform.rs b/src/platform.rs index 5c285ec..c96872d 100644 --- a/src/platform.rs +++ b/src/platform.rs @@ -8,10 +8,12 @@ use anyhow::Result; use nix::errno::Errno; use nix::unistd::{access, AccessFlags}; -use serde::Deserialize; +use serde::de::Error; +use serde::{Deserialize, Deserializer}; use std::io::ErrorKind; use std::os::unix::fs::MetadataExt; use std::path::PathBuf; +use strum::VariantNames; use tokio::fs::{metadata, read_to_string}; #[cfg(not(test))] use tokio::sync::OnceCell; @@ -19,6 +21,7 @@ use tokio::task::spawn_blocking; #[cfg(not(test))] use crate::hardware::{device_type, DeviceType}; +use crate::power::TdpLimitingMethod; #[cfg(not(test))] static CONFIG: OnceCell> = OnceCell::const_new(); @@ -31,7 +34,7 @@ pub(crate) struct PlatformConfig { pub update_dock: Option, pub storage: Option, pub fan_control: Option, - pub tdp_limit: Option>, + pub tdp_limit: Option, pub gpu_clocks: Option>, pub battery_charge_limit: Option, pub performance_profile: Option, @@ -120,6 +123,13 @@ pub(crate) struct PerformanceProfileConfig { pub platform_profile_name: String, } +#[derive(Clone, Deserialize, Debug)] +pub(crate) struct TdpLimitConfig { + #[serde(deserialize_with = "de_tdp_limiter_method")] + pub method: TdpLimitingMethod, + pub range: Option>, +} + #[derive(Clone, Default, Deserialize, Debug)] pub(crate) struct FormatDeviceConfig { pub script: PathBuf, @@ -171,6 +181,16 @@ impl PlatformConfig { } } +fn de_tdp_limiter_method<'de, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, + D::Error: Error, +{ + let string = String::deserialize(deserializer)?; + TdpLimitingMethod::try_from(string.as_str()) + .map_err(|_| D::Error::unknown_variant(string.as_str(), TdpLimitingMethod::VARIANTS)) +} + #[cfg(not(test))] pub(crate) async fn platform_config() -> Result<&'static Option> { CONFIG.get_or_try_init(PlatformConfig::load).await diff --git a/src/power.rs b/src/power.rs index 6d20643..b5821e9 100644 --- a/src/power.rs +++ b/src/power.rs @@ -6,13 +6,14 @@ */ use anyhow::{anyhow, bail, ensure, Result}; +use async_trait::async_trait; use lazy_static::lazy_static; use num_enum::TryFromPrimitive; use regex::Regex; use std::ops::RangeInclusive; use std::path::{Path, PathBuf}; use std::str::FromStr; -use strum::{Display, EnumString}; +use strum::{Display, EnumString, VariantNames}; use tokio::fs::{self, try_exists, File}; use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader}; use tracing::{error, warn}; @@ -88,6 +89,34 @@ pub enum CPUScalingGovernor { SchedUtil, } +#[derive(Display, EnumString, VariantNames, PartialEq, Debug, Copy, Clone)] +#[strum(serialize_all = "snake_case")] +pub enum TdpLimitingMethod { + GpuHwmon, +} + +#[derive(Debug)] +pub(crate) struct GpuHwmonTdpLimitManager {} + +#[async_trait] +pub(crate) trait TdpLimitManager: Send + Sync { + async fn get_tdp_limit(&self) -> Result; + async fn set_tdp_limit(&self, limit: u32) -> Result<()>; + async fn get_tdp_limit_range(&self) -> Result>; +} + +pub(crate) async fn tdp_limit_manager() -> Result> { + let config = platform_config().await?; + let config = config + .as_ref() + .and_then(|config| config.tdp_limit.as_ref()) + .ok_or(anyhow!("No TDP limit configured"))?; + + Ok(match config.method { + TdpLimitingMethod::GpuHwmon => Box::new(GpuHwmonTdpLimitManager {}), + }) +} + async fn read_gpu_sysfs_contents>(suffix: S) -> Result { // Read a given suffix for the GPU let base = find_hwmon(GPU_HWMON_NAME).await?; @@ -374,44 +403,52 @@ async fn find_platform_profile(name: &str) -> Result { find_sysdir(path(PLATFORM_PROFILE_PREFIX), name).await } -pub(crate) async fn get_tdp_limit() -> Result { - let base = find_hwmon(GPU_HWMON_NAME).await?; - let power1cap = fs::read_to_string(base.join(TDP_LIMIT1)).await?; - let power1cap: u32 = power1cap.trim_end().parse()?; - Ok(power1cap / 1_000_000) -} - -pub(crate) async fn set_tdp_limit(limit: u32) -> Result<()> { - ensure!( - get_tdp_limit_range().await?.contains(&limit), - "Invalid limit" - ); - let data = format!("{limit}000000"); - - let base = find_hwmon(GPU_HWMON_NAME).await?; - write_synced(base.join(TDP_LIMIT1), data.as_bytes()) - .await - .inspect_err(|message| { - error!("Error opening sysfs power1_cap file for writing TDP limits {message}"); - })?; - - if let Ok(mut power2file) = File::create(base.join(TDP_LIMIT2)).await { - power2file - .write(data.as_bytes()) - .await - .inspect_err(|message| error!("Error writing to power2_cap file: {message}"))?; - power2file.flush().await?; +#[async_trait] +impl TdpLimitManager for GpuHwmonTdpLimitManager { + async fn get_tdp_limit(&self) -> Result { + let base = find_hwmon(GPU_HWMON_NAME).await?; + let power1cap = fs::read_to_string(base.join(TDP_LIMIT1)).await?; + let power1cap: u32 = power1cap.trim_end().parse()?; + Ok(power1cap / 1_000_000) } - Ok(()) -} -pub(crate) async fn get_tdp_limit_range() -> Result> { - let range = platform_config() - .await? - .as_ref() - .and_then(|config| config.tdp_limit) - .ok_or(anyhow!("No TDP limit range configured"))?; - Ok(range.min..=range.max) + async fn set_tdp_limit(&self, limit: u32) -> Result<()> { + ensure!( + self.get_tdp_limit_range().await?.contains(&limit), + "Invalid limit" + ); + + let data = format!("{limit}000000"); + + let base = find_hwmon(GPU_HWMON_NAME).await?; + write_synced(base.join(TDP_LIMIT1), data.as_bytes()) + .await + .inspect_err(|message| { + error!("Error opening sysfs power1_cap file for writing TDP limits {message}"); + })?; + + if let Ok(mut power2file) = File::create(base.join(TDP_LIMIT2)).await { + power2file + .write(data.as_bytes()) + .await + .inspect_err(|message| error!("Error writing to power2_cap file: {message}"))?; + power2file.flush().await?; + } + Ok(()) + } + + async fn get_tdp_limit_range(&self) -> Result> { + let config = platform_config().await?; + let config = config + .as_ref() + .and_then(|config| config.tdp_limit.as_ref()) + .ok_or(anyhow!("No TDP limit configured"))?; + + if let Some(range) = config.range { + return Ok(range.min..=range.max); + } + bail!("No TDP limit range configured"); + } } pub(crate) async fn get_max_charge_level() -> Result { @@ -477,7 +514,7 @@ pub(crate) mod test { use super::*; use crate::hardware::test::fake_model; use crate::hardware::SteamDeckVariant; - use crate::platform::{BatteryChargeLimitConfig, PlatformConfig, RangeConfig}; + use crate::platform::{BatteryChargeLimitConfig, PlatformConfig, RangeConfig, TdpLimitConfig}; use crate::{enum_roundtrip, testing}; use anyhow::anyhow; use tokio::fs::{create_dir_all, read_to_string, remove_dir, write}; @@ -651,41 +688,53 @@ CCLK_RANGE in Core0: } #[tokio::test] - async fn test_get_tdp_limit() { - let _h = testing::start(); + async fn test_gpu_hwmon_get_tdp_limit() { + let handle = testing::start(); + + let mut platform_config = PlatformConfig::default(); + platform_config.tdp_limit = Some(TdpLimitConfig { + method: TdpLimitingMethod::GpuHwmon, + range: Some(RangeConfig { min: 3, max: 15 }), + }); + handle.test.platform_config.replace(Some(platform_config)); + let manager = tdp_limit_manager().await.unwrap(); setup().await.expect("setup"); let hwmon = path(HWMON_PREFIX); - assert!(get_tdp_limit().await.is_err()); + assert!(manager.get_tdp_limit().await.is_err()); write(hwmon.join("hwmon5").join(TDP_LIMIT1), "15000000\n") .await .expect("write"); - assert_eq!(get_tdp_limit().await.unwrap(), 15); + assert_eq!(manager.get_tdp_limit().await.unwrap(), 15); } #[tokio::test] - async fn test_set_tdp_limit() { + async fn test_gpu_hwmon_set_tdp_limit() { let handle = testing::start(); let mut platform_config = PlatformConfig::default(); - platform_config.tdp_limit = Some(RangeConfig { min: 3, max: 15 }); + platform_config.tdp_limit = Some(TdpLimitConfig { + method: TdpLimitingMethod::GpuHwmon, + range: Some(RangeConfig { min: 3, max: 15 }), + }); handle.test.platform_config.replace(Some(platform_config)); + let manager = tdp_limit_manager().await.unwrap(); assert_eq!( - set_tdp_limit(2).await.unwrap_err().to_string(), + manager.set_tdp_limit(2).await.unwrap_err().to_string(), anyhow!("Invalid limit").to_string() ); assert_eq!( - set_tdp_limit(20).await.unwrap_err().to_string(), + manager.set_tdp_limit(20).await.unwrap_err().to_string(), anyhow!("Invalid limit").to_string() ); - assert!(set_tdp_limit(10).await.is_err()); + assert!(manager.set_tdp_limit(10).await.is_err()); let hwmon = path(HWMON_PREFIX); assert_eq!( - set_tdp_limit(10).await.unwrap_err().to_string(), + manager.set_tdp_limit(10).await.unwrap_err().to_string(), anyhow!("No such file or directory (os error 2)").to_string() ); @@ -698,7 +747,7 @@ CCLK_RANGE in Core0: .await .expect("create_dir_all"); assert_eq!( - set_tdp_limit(10).await.unwrap_err().to_string(), + manager.set_tdp_limit(10).await.unwrap_err().to_string(), anyhow!("Is a directory (os error 21)").to_string() ); @@ -706,7 +755,7 @@ CCLK_RANGE in Core0: .await .expect("remove_dir"); write(hwmon.join(TDP_LIMIT1), "0").await.expect("write"); - assert!(set_tdp_limit(10).await.is_ok()); + assert!(manager.set_tdp_limit(10).await.is_ok()); let power1_cap = read_to_string(hwmon.join(TDP_LIMIT1)) .await .expect("power1_cap"); @@ -716,7 +765,7 @@ CCLK_RANGE in Core0: .await .expect("remove_dir"); write(hwmon.join(TDP_LIMIT2), "0").await.expect("write"); - assert!(set_tdp_limit(15).await.is_ok()); + assert!(manager.set_tdp_limit(15).await.is_ok()); let power1_cap = read_to_string(hwmon.join(TDP_LIMIT1)) .await .expect("power1_cap");