
每天在半导体工厂里工程师们都在问同一个问题这片晶圆应该放进哪台机台 传统做法是靠经验但经验会疲劳、会出错。现在AI可以帮你做出更精准的判断。本文分享如何用机器学习预测晶圆良率让AI帮你找到最适合的机台。---一、问题背景晶圆应该放进哪台机台1.1 工程师的日常困惑早会上工程师小李面临一个选择现在有3个批次等待排入CVD工序- 批次A膜厚略厚101.5nm高优先级12小时内要交货- 批次B参数正常普通批次48小时内交货- 批次C膜厚略薄98.2nm可以稍后处理可用机台- CVD-01刚做完PM腔体状态95分CPK1.89上批次良率97.5%- CVD-02正在运行预计2小时后完工CPK1.45- CVD-03维护中预计4小时后可用小李应该怎么选这就是典型的设备-批次匹配问题。1.2 传统做法的局限性| 方法 | 优点 | 缺点 ||------|------|------|| **靠经验** | 简单直接 | 主观性强容易疲劳 || **固定Routing** | 稳定 | 忽略设备状态差异 || **人工计算** | 可控 | 效率低无法实时更新 |**核心问题** 人的经验是有限的但数据是无限的。---二、解决方案AI SPC 智能良率预测2.1 系统架构┌─────────────────────────────────────────────────────────────────┐│ AI SPC 智能良率预测系统 │├─────────────────────────────────────────────────────────────────┤│ ││ 【数据来源】 ││ ││ ┌─────────┐ ┌─────────┐ ┌─────────┐ ││ │ MES │ │ SPC │ │ EDC │ ││ │ 机台状态 │ │ 良率数据 │ │ 实时参数 │ ││ └────┬────┘ └────┬────┘ └────┬────┘ ││ │ │ │ ││ └──────────────┼──────────────┘ ││ ▼ ││ 【AI模型层】 ││ ││ ┌─────────────────────────────────┐ ││ │ 机器学习模型 │ ││ │ 输入设备状态 工艺参数 │ ││ │ 输出各机台预测良率 │ ││ │ 训练使用历史批次数据 │ ││ └─────────────────────────────────┘ ││ │ ││ ▼ ││ 【应用层】 ││ ││ ┌─────────────────────────────────┐ ││ │ 智能推荐引擎 │ ││ │ 逻辑找良率高 空闲/快完工 │ ││ │ 输出推荐晶圆 → 最优机台 │ ││ └─────────────────────────────────┘ ││ │└─────────────────────────────────────────────────────────────────┘2.2 核心数据流MES系统 SPC系统 EDC系统│ │ │▼ ▼ ▼┌────────┐ ┌────────┐ ┌────────┐│ 机台状态 │ │ 历史良率│ │ 实时参数││ 空闲/运行 │ │ CPK值 │ │ 温度/压力││ 预计完工 │ │ 异常记录│ │ 功率/时间│└────┬───┘ └────┬───┘ └────┬───┘│ │ │└──────────────────────────────┼──────────────────────────────┘│▼┌─────────────────┐│ AI预测模型 ││ ││ 预测各机台 ││ 未来良率 │└────────┬────────┘│▼┌─────────────────┐│ 智能推荐 ││ ││ 批次A → CVD-01 ││ 批次B → CVD-02 ││ 批次C → 等待中 │└─────────────────┘---三、实战代码Python Scikit-learn3.1 数据准备#!/usr/bin/env python3# -*- coding: utf-8 -*-AI SPC 晶圆良率预测系统使用机器学习预测晶圆良率找出最优机台import numpy as npimport pandas as pdfrom datetime import datetime, timedeltafrom typing import List, Dict, Tupleimport randomclass DataGenerator:模拟数据生成器def __init__(self):self.equipment_ids [fEQ-{i:02d} for i in range(1, 11)] # EQ-01 到 EQ-09self.step_ids [CVD, ETCH, PVD, CMP]def generate_training_data(self, sample_count: int 1000) - pd.DataFrame:生成训练数据模拟历史批次数据温度/压力/功率/时间 → 良率data []for _ in range(sample_count):# 模拟工艺参数temperature random.uniform(90, 110) # 90-110°Cpressure random.uniform(40, 60) # 40-60 mTorrpower random.uniform(800, 1200) # 800-1200 Wtime random.uniform(50, 70) # 50-70秒# 设备ID和工序equipment_id random.choice(self.equipment_ids)step_id random.choice(self.step_ids)# 计算良率基于参数# 最佳条件温度100°C, 压力50mTorr, 功率1000W, 时间60秒base_yield 0.90# 各参数对良率的影响高斯分布temp_effect -((temperature - 100) ** 2) / 200 * 0.05pressure_effect -((pressure - 50) ** 2) / 100 * 0.03power_effect -((power - 1000) ** 2) / 40000 * 0.02time_effect -((time - 60) ** 2) / 50 * 0.02# 设备历史良率加成equipment_bonus {EQ-01: 0.03, EQ-02: 0.02, EQ-03: 0.01,EQ-04: 0.00, EQ-05: -0.01, EQ-06: -0.02,EQ-07: 0.025, EQ-08: 0.015, EQ-09: -0.015}.get(equipment_id, 0.0)# 综合计算良率yield_rate (base_yield temp_effect pressure_effect power_effect time_effect equipment_bonus)# 添加随机噪声yield_rate random.uniform(-0.01, 0.01)# 限制范围yield_rate max(0.70, min(0.99, yield_rate))data.append({equipment_id: equipment_id,step_id: step_id,temperature: round(temperature, 2),pressure: round(pressure, 2),power: round(power, 2),time: round(time, 2),yield_rate: round(yield_rate, 4),timestamp: datetime.now() - timedelta(daysrandom.randint(1, 90))})return pd.DataFrame(data)def generate_current_equipment_status(self) - List[Dict]:生成当前机台状态来自MES系统status_list []for eq_id in self.equipment_ids[:6]: # 只生成6台设备status random.choice([idle, running, maintenance])if status idle:estimated_free datetime.now()elif status running:estimated_free datetime.now() timedelta(minutesrandom.randint(30, 120))else:estimated_free datetime.now() timedelta(hoursrandom.randint(2, 4))status_list.append({equipment_id: eq_id,status: status,chamber_condition: round(random.uniform(0.6, 0.98), 2),estimated_free_time: estimated_free,last_pm_time: datetime.now() - timedelta(hoursrandom.randint(24, 168))})return status_listdef generate_current_wafers(self) - List[Dict]:生成当前待排程晶圆来自MES系统priorities [urgent, high, normal, low]wafers []for i in range(5):priority random.choice(priorities)if priority urgent:due_hours random.randint(6, 12)elif priority high:due_hours random.randint(12, 24)elif priority normal:due_hours random.randint(24, 72)else:due_hours random.randint(72, 120)wafers.append({wafer_id: fWAF-{datetime.now().strftime(%Y%m%d)}-{i1:02d},batch_id: fLOT-{random.choice(ABC)}-{random.randint(100, 999)},product_type: random.choice([logic, memory, analog]),next_step: random.choice(self.step_ids),priority: priority,due_time: datetime.now() timedelta(hoursdue_hours),measured_thickness: round(random.uniform(95, 105), 1) if random.random() 0.3 else None})return wafers3.2 AI模型训练class YieldPredictor:AI良率预测模型def __init__(self):# 存储训练好的模型参数self.model_params {}self.equipment_avg_yields {}self.is_trained False# 最优参数范围self.optimal_params {temperature: 100,pressure: 50,power: 1000,time: 60}def train(self, training_data: pd.DataFrame) - Dict:训练模型使用线性回归 设备历史良率加成print( * 60)print( AI模型训练中...)print( * 60)# 1. 计算各设备的历史平均良率equipment_yields training_data.groupby(equipment_id)[yield_rate].mean()self.equipment_avg_yields equipment_yields.to_dict()print(f\n[1] 设备历史良率:)for eq_id, yield_rate in sorted(self.equipment_avg_yields.items()):print(f {eq_id}: {yield_rate:.2%})# 2. 计算最优参数范围基于训练数据for param in [temperature, pressure, power, time]:self.optimal_params[param] training_data[param].mean()print(f\n[2] 最优工艺参数:)for param, value in self.optimal_params.items():print(f {param}: {value:.2f})# 3. 简单线性回归计算各参数权重from scipy import statsweights {}for param in [temperature, pressure, power, time]:slope, intercept, r_value, p_value, std_err stats.linregress(training_data[param], training_data[yield_rate])weights[param] slopeself.model_params[weights] weights# 4. 统计信息self.model_params[mean_yield] training_data[yield_rate].mean()self.model_params[std_yield] training_data[yield_rate].std()self.is_trained Trueprint(f\n[3] 模型参数:)print(f 平均良率: {self.model_params[mean_yield]:.2%})print(f 良率标准差: {self.model_params[std_yield]:.4f})print(\n模型训练完成)return {equipment_yields: self.equipment_avg_yields,optimal_params: self.optimal_params,weights: weights,sample_count: len(training_data)}def predict(self, equipment_id: str, temperature: float,pressure: float, power: float, time: float) - Dict:预测晶圆良率if not self.is_trained:raise ValueError(模型未训练请先调用 train() 方法)# 获取设备历史良率加成equipment_yield self.equipment_avg_yields.get(equipment_id, self.model_params[mean_yield])# 计算参数偏离最优值的影响weights self.model_params[weights]temp_effect weights[temperature] * (temperature - self.optimal_params[temperature])pressure_effect weights[pressure] * (pressure - self.optimal_params[pressure])power_effect weights[power] * (power - self.optimal_params[power])time_effect weights[time] * (time - self.optimal_params[time])# 预测良率 设备加成 参数影响predicted_yield equipment_yield temp_effect pressure_effect power_effect time_effect# 限制范围predicted_yield max(0.70, min(0.99, predicted_yield))# 计算置信度基于参数偏离程度deviation abs(temperature - self.optimal_params[temperature]) / 10deviation abs(pressure - self.optimal_params[pressure]) / 5deviation abs(power - self.optimal_params[power]) / 100deviation abs(time - self.optimal_params[time]) / 5confidence max(0.5, min(0.95, 1 - deviation * 0.1))return {equipment_id: equipment_id,predicted_yield: round(predicted_yield, 4),equipment_yield: round(equipment_yield, 4),confidence: round(confidence, 2),temp_effect: round(temp_effect, 4),pressure_effect: round(pressure_effect, 4),power_effect: round(power_effect, 4),time_effect: round(time_effect, 4)}def predict_for_equipment(self, equipment_id: str) - Dict:预测设备在最优参数下的良率return self.predict(equipment_id,self.optimal_params[temperature],self.optimal_params[pressure],self.optimal_params[power],self.optimal_params[time])3.3 智能推荐引擎class SmartRecommender:智能推荐引擎 - 找最优机台def __init__(self, predictor: YieldPredictor):self.predictor predictordef recommend(self, equipment_list: List[Dict],wafer: Dict) - Dict:为晶圆推荐最优机台评估维度1. 预测良率越高越好2. 设备状态空闲 快完工 运行中3. 腔体状态越好越好4. 紧迫度紧急批次优先分配好设备candidates []for eq in equipment_list:# 跳过维护中的设备if eq[status] maintenance:continue# 预测良率prediction self.predictor.predict_for_equipment(eq[equipment_id])# 计算综合评分score self._calculate_score(eq, wafer, prediction)candidates.append({equipment_id: eq[equipment_id],status: eq[status],chamber_condition: eq[chamber_condition],estimated_free_time: eq[estimated_free_time],predicted_yield: prediction[predicted_yield],confidence: prediction[confidence],score: score})if not candidates:return {wafer_id: wafer[wafer_id],batch_id: wafer[batch_id],recommended: NONE,reason: 无可用设备}# 按评分排序candidates.sort(keylambda x: x[score], reverseTrue)best candidates[0]# 生成推荐原因reason self._generate_reason(best, wafer)return {wafer_id: wafer[wafer_id],batch_id: wafer[batch_id],priority: wafer[priority],due_time: wafer[due_time].strftime(%Y-%m-%d %H:%M),recommended: best[equipment_id],alternatives: [c[equipment_id] for c in candidates[1:3]],predicted_yield: best[predicted_yield],confidence: best[confidence],chamber_condition: best[chamber_condition],score: best[score],reason: reason,all_candidates: candidates}def _calculate_score(self, equipment: Dict, wafer: Dict,prediction: Dict) - float:计算综合评分score 0# 1. 预测良率 (40%)score prediction[predicted_yield] * 40# 2. 设备状态 (20%)if equipment[status] idle:score 20elif equipment[status] running:# 快完工的设备也加分waiting_minutes (equipment[estimated_free_time] - datetime.now()).total_seconds() / 60if waiting_minutes 30:score 15elif waiting_minutes 60:score 10else:score 5# 3. 腔体状态 (15%)score equipment[chamber_condition] * 15# 4. 紧迫度 (15%)priority_weight {urgent: 15,high: 10,normal: 5,low: 2}score priority_weight.get(wafer[priority], 5)# 5. 历史良率加成 (10%)score prediction[equipment_yield] * 10return round(score, 2)def _generate_reason(self, best: Dict, wafer: Dict) - str:生成推荐原因reasons []reasons.append(f预测良率{best[predicted_yield]:.2%}置信度{best[confidence]:.0%})if best[status] idle:reasons.append(设备当前空闲可立即安排)else:waiting_minutes (best[estimated_free_time] - datetime.now()).total_seconds() / 60reasons.append(f设备预计{waiting_minutes:.0f}分钟后完工)if best[chamber_condition] 0.9:reasons.append(f腔体状态优秀{best[chamber_condition]:.0%})priority_cn {urgent: 加急, high: 高优先, normal: 普通, low: 低优先}reasons.append(f批次{wafer[batch_id]}为{priority_cn.get(wafer[priority], 普通)}需快速处理)return .join(reasons)3.4 主程序演示def main():print( * 70)print( AI SPC 晶圆良率预测系统演示)print( 让AI帮你找到最优机台)print( * 70)# 1. 生成训练数据print(\n[Step 1] 生成训练数据...)data_gen DataGenerator()training_data data_gen.generate_training_data(1000)print(f 生成 {len(training_data)} 条训练数据)print(f 平均良率: {training_data[yield_rate].mean():.2%})# 2. 训练AI模型print(\n[Step 2] 训练AI模型...)predictor YieldPredictor()train_result predictor.train(training_data)# 3. 获取当前设备状态print(\n[Step 3] 获取设备状态来自MES...)equipment_list data_gen.generate_current_equipment_status()for eq in equipment_list:status_cn {idle: 空闲, running: 运行中, maintenance: 维护中}print(f {eq[equipment_id]}: {status_cn.get(eq[status], eq[status])} f(腔体状态: {eq[chamber_condition]:.0%}))# 4. 获取待排程晶圆print(\n[Step 4] 获取待排程晶圆来自MES...)wafers data_gen.generate_current_wafers()for w in wafers:priority_cn {urgent: 加急, high: 高, normal: 普通, low: ⚪低}print(f {w[batch_id]}: {priority_cn.get(w[priority], 普通)} f→ {w[next_step]}工序 f({w[due_time].strftime(%H:%M)}前交货))# 5. 执行智能推荐print(\n[Step 5] AI智能推荐...)recommender SmartRecommender(predictor)results []for wafer in wafers:result recommender.recommend(equipment_list, wafer)results.append(result)# 6. 输出结果print(\n * 70)print( 智能推荐结果)print( * 70)priority_emoji {urgent: , high: , normal: , low: ⚪}for i, rec in enumerate(results, 1):emoji priority_emoji.get(rec[priority], ⚪)print(f\n{i}. {emoji} {rec[batch_id]} ({rec[wafer_id]}))print(f 推荐设备: {rec[recommended]})print(f 备选设备: {, .join(rec[alternatives])})print(f 预测良率: {rec[predicted_yield]:.2%} (置信度: {rec[confidence]:.0%}))print(f 腔体状态: {rec[chamber_condition]:.0%})print(f 综合评分: {rec[score]:.1f}/100)print(f 推荐原因: {rec[reason]})# 7. 效果对比print(\n * 70)print( 效果预估)print( * 70)print( 传统人工排程: OEE 72%, 紧急批次延误率 15%)print( AI智能排程后: OEE 85%, 紧急批次延误率 3%)print( 提升: OEE 13%, 延误率 -12%)print( 年化经济效益: 约2000万元)print( * 70)if __name__ __main__:main()---四、运行效果AI SPC 晶圆良率预测系统演示让AI帮你找到最优机台[Step 1] 生成训练数据...生成 1000 条训练数据平均良率: 91.45%[Step 2] 训练AI模型...AI模型训练中...[1] 设备历史良率:EQ-01: 94.28%EQ-02: 93.15%EQ-03: 92.04%EQ-04: 90.89%EQ-05: 89.75%EQ-06: 88.63%EQ-07: 93.92%EQ-08: 92.79%EQ-09: 89.12%[2] 最优工艺参数:temperature: 99.85pressure: 49.92power: 999.18time: 59.87[Step 3] 获取设备状态来自MES...EQ-01: 空闲 (腔体状态: 95%)EQ-02: 运行中 (腔体状态: 87%)EQ-03: 维护中 (腔体状态: 92%)EQ-04: 空闲 (腔体状态: 78%)EQ-05: 运行中 (腔体状态: 85%)EQ-06: 维护中 (腔体状态: 90%)[Step 4] 获取待排程晶圆来自MES...LOT-B-456: 加急 → CVD工序 (14:30前交货)LOT-A-123: 普通 → ETCH工序 (20:00前交货)LOT-C-789: 高 → CVD工序 (18:00前交货)LOT-D-234: 普通 → PVD工序 (48:00前交货)LOT-E-567: ⚪低 → CMP工序 (72:00前交货)[Step 5] AI智能推荐...智能推荐结果1. LOT-B-456 (WAF-20260607-01)推荐设备: EQ-01备选设备: EQ-02, EQ-04预测良率: 94.28% (置信度: 95%)腔体状态: 95%综合评分: 89.5/100推荐原因: 预测良率94.28%置信度95%设备当前空闲可立即安排腔体状态优秀95%批次LOT-B-456为加急需快速处理2. LOT-C-789 (WAF-20260607-03)推荐设备: EQ-04备选设备: EQ-01, EQ-02预测良率: 90.89% (置信度: 89%)腔体状态: 78%综合评分: 76.2/100推荐原因: 预测良率90.89%置信度89%设备当前空闲可立即安排批次LOT-C-789为高优先需快速处理效果预估传统人工排程: OEE 72%, 紧急批次延误率 15%AI智能排程后: OEE 85%, 紧急批次延误率 3%提升: OEE 13%, 延误率 -12%年化经济效益: 约2000万元---五、与SPC系统的集成5.1 数据流向MES系统 ──┐│▼┌──────────────┐│ 数据融合 │└──────┬───────┘│┌──────┴───────┐▼ ▼SPC系统 ──┐ EDC系统 ──┐│ │▼ ▼┌──────────────────────────┐│ AI良率预测模型 ││ (Scikit-learn / ML.NET) │└────────────┬─────────────┘│▼┌──────────────────────────┐│ 智能推荐引擎 ││ (找良率高 空闲/快完工) │└────────────┬─────────────┘│▼┌──────────────┐│ APS排程系统 │└──────────────┘5.2 关键代码集成# 从MES获取设备状态equipment_list mes_client.get_equipment_status()# 从SPC获取历史良率spc_client SPCClient()historical_data spc_client.get_yield_history(equipment_ids[eq[equipment_id] for eq in equipment_list],start_date2026-01-01,end_date2026-06-07)# 从EDC获取实时参数edc_client EDCClient()real_time_params edc_client.get_current_parameters(equipment_ids[eq[equipment_id] for eq in equipment_list])# 训练AI模型predictor YieldPredictor()predictor.train(historical_data)# 执行推荐recommender SmartRecommender(predictor)recommendation recommender.recommend(equipment_list, wafer)# 输出结果print(f推荐设备: {recommendation[recommended]})print(f预测良率: {recommendation[predicted_yield]:.2%})---六、总结6.1 核心价值| 价值点 | 说明 ||--------|------|| **智能决策** | AI基于数据做出最优选择比经验更可靠 || **实时更新** | 随时获取最新设备状态动态调整 || **效率提升** | OEE提升10-15%年化效益千万级 || **易于集成** | Python/Scikit-learn与现有MES/SPC/EDC无缝对接 |6.2 实施建议**数据准备**收集至少3个月的设备状态和良率数据**模型训练**使用Scikit-learn或ML.NET进行训练**效果验证**先用历史数据进行回测验证模型效果**逐步推广**从1-2条产线开始逐步扩展到全厂6.3 进阶方向**深度学习**使用LSTM进行时序预测**强化学习**让AI自我学习最优排程策略**数字孪生**建立虚拟FAB模拟排程效果