在Python机器学习中,模型调参是一个非常重要的步骤,它可以帮助我们优化模型的性能。以下是一些常用的模型调参技巧:
n_jobs参数加速搜索过程。from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
param_grid = {
'n_estimators': [100, 200, 300],
'max_depth': [None, 10, 20, 30],
'min_samples_split': [2, 5, 10]
}
grid_search = GridSearchCV(estimator=RandomForestClassifier(), param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)
print(grid_search.best_params_)
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint
param_dist = {
'n_estimators': randint(100, 500),
'max_depth': [None, 10, 20, 30],
'min_samples_split': randint(2, 11)
}
random_search = RandomizedSearchCV(estimator=RandomForestClassifier(), param_distributions=param_dist, n_iter=100, cv=5, n_jobs=-1, random_state=42)
random_search.fit(X_train, y_train)
print(random_search.best_params_)
from skopt import BayesSearchCV
from skopt.space import Real, Integer
bayes_search = BayesSearchCV(estimator=RandomForestClassifier(), search_spaces={
'n_estimators': Integer(100, 500),
'max_depth': Integer(10, 30),
'min_samples_split': Integer(2, 10)
}, n_iter=50, cv=5, n_jobs=-1)
bayes_search.fit(X_train, y_train)
print(bayes_search.best_params_)
import optuna
def objective(trial):
params = {
'n_estimators': trial.suggest_int('n_estimators', 100, 500),
'max_depth': trial.suggest_categorical('max_depth', [None, 10, 20, 30]),
'min_samples_split': trial.suggest_int('min_samples_split', 2, 10)
}
model = RandomForestClassifier(**params)
score = cross_val_score(model, X_train, y_train, cv=5).mean()
return score
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)
print(study.best_params)
from sklearn.decomposition import PCA
pca = PCA(n_components=0.95) # 保留95%的方差
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)
# 使用降维后的数据重新训练模型
model.fit(X_train_pca, y_train)
from sklearn.model_selection import learning_curve, validation_curve
train_sizes, train_scores, test_scores = learning_curve(model, X, y, cv=5, scoring='accuracy')
train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)
test_std = np.std(test_scores, axis=1)
# 绘制学习曲线
plt.plot(train_sizes, train_mean, label='Training score')
plt.plot(train_sizes, test_mean, label='Cross-validation score')
plt.fill_between(train_sizes, train_mean - train_std, train_mean + train_std, alpha=0.1)
plt.fill_between(train_sizes, test_mean - test_std, test_mean + test_std, alpha=0.1)
plt.xlabel('Training examples')
plt.ylabel('Accuracy')
plt.legend(loc='best')
plt.show()
from sklearn.model_selection import cross_val_score
best_score = 0
best_model = None
for n_estimators in range(100, 1000, 100):
model = RandomForestClassifier(n_estimators=n_estimators)
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
mean_score = np.mean(scores)
if mean_score > best_score:
best_score = mean_score
best_model = model
else:
break # 早停
print(best_model)
通过结合这些技巧,你可以更有效地进行模型调参,从而提升模型的性能。
免责声明:本站发布的内容(图片、视频和文字)以原创、转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:is@yisu.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。