[ PROMPT_NODE_22332 ]
hyperparameter-tuning
[ SKILL_DOCUMENTATION ]
# 使用 PyTorch Lightning 进行超参数调优
## 与调优框架的集成
Lightning 与流行的超参数调优库无缝集成。
### 1. Ray Tune 集成
**安装**:
bash
pip install ray[tune]
pip install lightning
**Ray Tune 基础示例**:
python
import lightning as L
from ray import tune
from ray.tune.integration.pytorch_lightning import TuneReportCallback
class LitModel(L.LightningModule):
def __init__(self, lr, batch_size):
super().__init__()
self.lr = lr
self.batch_size = batch_size
self.model = nn.Sequential(nn.Linear(10, 128), nn.ReLU(), nn.Linear(128, 1))
def training_step(self, batch, batch_idx):
loss = self.model(batch).mean()
self.log('train_loss', loss)
return loss
def validation_step(self, batch, batch_idx):
val_loss = self.model(batch).mean()
self.log('val_loss', val_loss)
def configure_optimizers(self):
return torch.optim.Adam(self.parameters(), lr=self.lr)
def train_fn(config):
"""Ray Tune 的训练函数。"""
model = LitModel(lr=config["lr"], batch_size=config["batch_size"])
# 添加回调以向 Tune 报告指标
trainer = L.Trainer(
max_epochs=10,
callbacks=[TuneReportCallback({"loss": "val_loss"}, on="validation_end")]
)
trainer.fit(model, train_loader, val_loader)
# 定义搜索空间
config = {
"lr": tune.loguniform(1e-5, 1e-1),
"batch_size": tune.choice([16, 32, 64, 128])
}
# 运行超参数搜索
analysis = tune.run(
train_fn,
config=config,
num_samples=20, # 20 次试验
resources_per_trial={"gpu": 1}
)
# 最佳超参数
best_config = analysis.get_best_config(metric="loss", mode="min")
print(f"最佳配置: {best_config}")
**高级:基于种群的训练 (PBT)**:
python
from ray.tune.schedulers import PopulationBasedTraining
# PBT 调度器
scheduler = PopulationBasedTraining(
time_attr='training_iteration',
metric='val_loss',
mode='min',
perturbation_interval=5, # 每 5 个 epoch 扰动一次
hyperparam_mutations={
"lr": tune.loguniform(1e-5, 1e-1),
"batch_size": [16, 32, 64, 128]
}
)
analysis = tune.run(
train_fn,
config=config,
num_samples=8, # 种群大小
scheduler=scheduler,
resources_per_trial={"gpu": 1}
)
### 2. Optuna 集成
**安装**:
bash
pip install optuna
pip install optuna-integratio