Linux cpu_cooling热电冷却与频率限制cpufreq_cooling Linux thermal框架的cpu_cooling子系统通过cpufreq_cooling实现基于频率调整的CPU热冷却机制。核心结构体是cpufreq_cooling_device它将cpufreq频率调节能力抽象为thermal的冷却设备cooling device。cpufreq_cooling注册入口是cpufreq_cooling_register()接收cpufreq_policy参数返回struct thermal_cooling_devicecstruct thermal_cooling_device *cpufreq_cooling_register(struct cpufreq_policy *policy){struct cpufreq_cooling_device *cpufreq_cdev;struct thermal_cooling_device *cdev;int ret;// 分配cpufreq冷却设备结构体cpufreq_cdev kzalloc(sizeof(*cpufreq_cdev), GFP_KERNEL);if (!cpufreq_cdev)return ERR_PTR(-ENOMEM);// 获取CPU频率表并初始化OPPcpufreq_cdev-policy policy;cpufreq_cdev-max_level cpufreq_cdev-freq_table-size - 1;// 注册为热冷却设备cdev thermal_cooling_device_register(cpufreq, cpufreq_cdev,cpufreq_cooling_ops);if (IS_ERR(cdev)) {kfree(cpufreq_cdev);return cdev;}cpufreq_cdev-cdev cdev;return cdev;}cpufreq_cooling_ops定义了冷却设备的三个核心操作cstatic struct thermal_cooling_device_ops cpufreq_cooling_ops {.get_max_state cpufreq_get_max_state, // 获取最大冷却级别.get_cur_state cpufreq_get_cur_state, // 获取当前冷却级别.set_cur_state cpufreq_set_cur_state, // 设置冷却级别};set_cur_state是频率限制的关键函数它根据传入的冷却级别cooling state映射到对应的CPU频率上限cstatic int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,unsigned long state){struct cpufreq_cooling_device *cpufreq_cdev cdev-devdata;struct cpufreq_policy *policy cpufreq_cdev-policy;unsigned int clip_freq;// 状态0表示无冷却限制使用最高频率// 状态值越大冷却越强频率越低if (state 0) {clip_freq policy-cpuinfo.max_freq;} else {// 根据冷却级别查表得到限制频率// state1限制到次高频率statemax_level限制到最低频率int index cpufreq_cdev-max_level - state;clip_freq cpufreq_cdev-freq_table[index].frequency;}// 通过cpufreq设置新的频率上限// 防止CPU频率超过clip_freqcpufreq_verify_within_limits(policy,policy-cpuinfo.min_freq, clip_freq);// 触发频率调节cpufreq_update_policy(policy-cpu);// 更新当前状态计数器cpufreq_cdev-cpufreq_state state;return 0;}冷却级别与CPU频率的映射关系由频率表定义。每个冷却级别对应一个最大允许频率级别0无限制最高级别将CPU锁定在最低频率cstatic int cpufreq_get_max_state(struct thermal_cooling_device *cdev,unsigned long *state){struct cpufreq_cooling_device *cpufreq_cdev cdev-devdata;// 最大状态数 可用频率级别数 - 1*state cpufreq_cdev-max_level;return 0;}static int cpufreq_get_cur_state(struct thermal_cooling_device *cdev,unsigned long *state){struct cpufreq_cooling_device *cpufreq_cdev cdev-devdata;// 返回当前冷却级别*state cpufreq_cdev-cpufreq_state;return 0;}cpufreq_cooling还提供了power-aware接口通过cpufreq_power2state()将功耗需求转换为对应的冷却级别。power_allocator governor使用该接口实现PID控制器驱动的精确功率控制cint cpufreq_power2state(struct thermal_cooling_device *cdev,struct em_perf_state *table,u32 power, unsigned long *state){struct cpufreq_cooling_device *cpufreq_cdev cdev-devdata;int i;u32 freq, power_at_freq;// 从功耗模型中查找对应给定功率的最高频率for (i cpufreq_cdev-max_level; i 0; i--) {freq cpufreq_cdev-freq_table[i].frequency;power_at_freq table[i].power;if (power_at_freq power)break;}// 转换为冷却状态*state cpufreq_cdev-max_level - i;return 0;}cpufreq_cooling与thermal governor的联动当thermal_zone温度超过passive trip点时governor调用set_cur_state逐步降低频率温度回落后逐步升高频率。频率的实际调节由cpufreq驱动完成cpufreq_cooling只设置上限实际频率由cpufreq governor如schedutil在限制范围内选择。cpufreq_cooling_unregister()在模块卸载时调用清理频率表和冷却设备。在ARM平台上cpu_cooling通常与cpufreq驱动如cpufreq-dt配合使用通过设备树的thermal-zones节点配置频率冷却映射关系。