from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
def select_model(df, features):
all_X = df[features]
all_y = df[“Survived”]
models = [
{
"name": "LogisticRegression",
"estimator": LogisticRegression(),
"hyperparameters":
{
"solver": ['newton-cg','lbfgs','liblinear']
}
},
{
"name": "KNeighborsClassifier",
"estimator": KNeighborsClassifier(),
"hyperparameters":
{
"n_neighbors": range(1,20,2),
"weights": ["distance", "uniform"],
"algorithm": ["ball_tree", "kd_tree", "brute"],
"p": [1,2]
}
},
{
"name": "RandomForestClassifier",
"estimator": RandomForestClassifier(),
"hyperparameters":
{
"n_estimators": [4,6,9],
"criterion": ["entropy", "gini"],
"max_depth": [2,5,10],
"max_features": ['log2','sqrt'],
"min_samples_leaf": [1,5,8],
'min_samples_split': [2,3,5]
}
}
]
for model in models:
print(model['name'])
grid = GridSearchCV(model['estimator'],
param_grid = model['hyperparameters'],
cv=10)
grid.fit(all_X, all_y)
model['best_params'] = grid.best_params_
model['best_score'] = grid.best_score_
model['best_estimator'] = grid.best_estimator_
print('Best Score: {}'.format(model['best_score']))
print('Best Parameters: {}'.format(model['best_params']))
return models
def save_submission_file(model,cols,filename=‘submission.csv’):
holdout_data = holdout[cols]
predictions = model.predict(holdout_data)
submission = pd.DataFrame({'PassengerId': holdout['PassengerId'],
'Survived': predictions})
submission.to_csv(filename, index=False)
model = select_model(train, cols)
submission = save_submission_file(model[2], cols)
I expected the model to make predictions on the holdout data.
Instead, I received the error:
'dict' object has no attribute 'predict'
How can I fix this error?