Cesare
0
Q:

how to improve accuracy of random forest classifier

def display(results):
    print(f'Best parameters are: {results.best_params_}')
    print("\n")
    mean_score = results.cv_results_['mean_test_score']
    std_score = results.cv_results_['std_test_score']
    params = results.cv_results_['params']
    for mean,std,params in zip(mean_score,std_score,params):
        print(f'{round(mean,3)} + or -{round(std,3)} for the {params}')
0
from sklearn.model_selection import GridSearchCV
cv = GridSearchCV(rfc,parameters,cv=5)
cv.fit(train_features,train_label.values.ravel())
0
# Find number of features for cumulative importance of 95%# Add 1 because Python is zero-indexedprint('Number of features for 95% importance:', np.where(cumulative_importances > 0.95)[0][0] + 1)Number of features for 95% importance: 6
0
# List of features sorted from most to least importantsorted_importances = [importance[1] for importance in feature_importances]sorted_features = [importance[0] for importance in feature_importances]# Cumulative importancescumulative_importances = np.cumsum(sorted_importances)# Make a line graphplt.plot(x_values, cumulative_importances, 'g-')# Draw line at 95% of importance retainedplt.hlines(y = 0.95, xmin=0, xmax=len(sorted_importances), color = 'r', linestyles = 'dashed')# Format x ticks and labelsplt.xticks(x_values, sorted_features, rotation = 'vertical')# Axis labels and titleplt.xlabel('Variable'); plt.ylabel('Cumulative Importance'); plt.title('Cumulative Importances');
0

New to Communities?

Join the community