[ PROMPT_NODE_27358 ]
quick_reference
[ SKILL_DOCUMENTATION ]
# Scikit-learn 快速参考
## 常用导入模式
python
# 核心 scikit-learn
import sklearn
# 数据拆分与交叉验证
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
# 预处理
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
# 特征选择
from sklearn.feature_selection import SelectKBest, RFE
# 监督学习
from sklearn.linear_model import LogisticRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestClassifier, GradientBoostingRegressor
from sklearn.svm import SVC, SVR
from sklearn.tree import DecisionTreeClassifier
# 无监督学习
from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
from sklearn.decomposition import PCA, NMF
# 指标
from sklearn.metrics import (
accuracy_score, precision_score, recall_score, f1_score,
mean_squared_error, r2_score, confusion_matrix, classification_report
)
# 管道
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.compose import ColumnTransformer, make_column_transformer
# 工具
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
## 安装
bash
# 使用 uv (推荐)
uv pip install scikit-learn
# 可选依赖
uv pip install scikit-learn[plots] # 用于绘图工具
uv pip install pandas numpy matplotlib seaborn # 常用配套库
## 快速工作流模板
### 分类工作流
python
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
# 拆分数据
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, stratify=y, random_state=42
)
# 预处理
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# 训练
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)
# 评估
y_pred = model.predict(X_test_scaled)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
### 回归工作流
python
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_sco