
Linux irq_poll忙轮询中断模式与cap_poll适配irq_poll是Linux内核提供的一种中断与轮询混合机制用于在高中断频率场景下降低中断开销。它由include/linux/irq_poll.h和lib/irq_poll.c实现。当设备中断频率超过一定阈值时irq_poll自动将中断模式切换为轮询模式避免频繁的硬件中断上下文切换消耗CPU时间。irq_poll的核心数据结构cstruct irq_poll {struct list_head list;unsigned long state;int (*poll)(struct irq_poll *, int budget);unsigned int irq;};state字段使用位操作管理状态关键标志位如下c#define IRQ_POLL_F_SCHED 0#define IRQ_POLL_F_POLLING 1#define IRQ_POLL_F_DISABLE 2初始化一个irq_poll实例使用irq_poll_initcvoid irq_poll_init(struct irq_poll *iop, int irq,int (*poll)(struct irq_poll *, int)){INIT_LIST_HEAD(iop-list);iop-state 0;iop-irq irq;iop-poll poll;}irq_poll的核心调度入口是irq_poll_sched通常在中断处理handler中调用cvoid irq_poll_sched(struct irq_poll *iop){if (test_and_set_bit(IRQ_POLL_F_SCHED, iop-state))return;list_add_tail(iop-list, this_cpu_ptr(blk_cpu_iopoll));__raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);}该函数将irq_poll实例加入per-CPU链表blk_cpu_iopoll然后触发IRQ_POLL_SOFTIRQ软中断。软中断处理函数irq_poll_softirq在软中断上下文中执行轮询回调cstatic void irq_poll_softirq(struct softirq_action *h){int pending 0, budget irq_poll_budget;LIST_HEAD(list);local_irq_disable();list_splice_init(this_cpu_ptr(blk_cpu_iopoll), list);local_irq_enable();while (!list_empty(list)) {struct irq_poll *iop;int work_done;iop list_entry(list.next, struct irq_poll, list);work_done iop-poll(iop, budget);budget - min(budget, work_done);if (work_done budget) {list_move_tail(iop-list, list);pending;} else {list_del_init(iop-list);clear_bit(IRQ_POLL_F_SCHED, iop-state);if (work_done budget)pending;}}if (pending) {list_splice_tail_init(list, this_cpu_ptr(blk_cpu_iopoll));__raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);}local_irq_disable();list_splice_tail_init(list, this_cpu_ptr(blk_cpu_iopoll));local_irq_enable();}轮询回调用budget参数控制本次轮询的处理上限防止独占CPU。irq_poll_budget默认值为64表示每次软中断最多处理64个数据包或请求。如果poll返回的工作量等于budget说明仍有数据待处理irq_poll_softirq将重新触发软中断继续轮询。如果工作量小于budget说明设备已空清除IRQ_POLL_F_SCHED标志返回中断模式。cap_poll是在网络驱动NAPI poll基础上的IRQ层面扩展。部分设备通过set_irq_poll_cap接口标记自身支持poll能力cvoid irq_poll_set_cap(struct irq_poll *iop){set_bit(IRQ_POLL_F_DISABLE, iop-state);}将IRQ_POLL_F_DISABLE置位后irq_poll会在特定条件下禁用自身并退还中断模式。这个自适应切换是irq_poll区别于NAPI的关键点。irq_poll通过统计中断间隔时间动态判断是否切换cstatic bool irq_poll_check_poll(struct irq_poll *iop, unsigned int irq_interval){if (irq_interval irq_poll_timeout)return true;return false;}当连续中断间隔小于irq_poll_timeout默认200微秒时中断频率过高irq_poll倾向于保持在轮询模式当间隔变大时切换回中断模式以节省CPU。与NAPI相比irq_poll的差异在于1. irq_poll工作在软中断IRQ_POLL_SOFTIRQ中NAPI使用NET_RX_SOFTIRQ两者优先级不同。2. irq_poll的poll回调在软中断上下文每次被调用时最多执行budget次NAPI的poll在每次被调用时有固定的budget配额。3. irq_poll不需要像NAPI那样禁用设备的RX中断它仅通过IRQ_POLL_F_SCHED标志控制调度中断handler依然简短地记录状态。irq_poll与线程化中断的使用场景对比中断频率较低时线程化中断将处理推入进程上下文节省CPU中断频率极高如万兆网卡时线程化导致频繁唤醒和调度此时irq_poll在软中断中轮询更高效。cstatic int example_irq_handler(int irq, void *dev_id){struct my_device *dev dev_id;/* Quick hardware check */if (!readl(dev-regs INT_STATUS))return IRQ_NONE;/* Schedule irq_poll instead of processing all data */irq_poll_sched(dev-iop);return IRQ_HANDLED;}static int my_device_poll(struct irq_poll *iop, int budget){struct my_device *dev container_of(iop, struct my_device, iop);int work 0;while (work budget) {u32 status readl(dev-regs RX_STATUS);if (!(status RX_READY))break;/* Process one packet */work;}if (work budget) {/* No more data, re-enable interrupts */writel(INT_ENABLE, dev-regs INT_MASK);}return work;}在这个示例中IRQ handler仅检查状态并调用irq_poll_sched实际数据收发在poll回调中轮询完成。这种设计将高频率中断转换为有节制的轮询兼顾了延迟和吞吐量。