Evaluate classification models using F1 score.
F1 Score
Evaluation metric for classification algorithms
- F1 score combines precision and recall relative to a specific positive class -The F1 score can be interpreted as a weighted average of the precision and recall, where an F1 score reaches its best value at 1 and worst at 0
- F1 Score Documentation
In [28]:
# FORMULA
# F1 = 2 * (precision * recall) / (precision + recall)
In [8]:
# imports
import pandas as pd
# load dataset
path = 'titanic_data.csv'
X = pd.read_csv(path)
X.head(1)
Out[8]:
In [9]:
# only store numeric data in features
X = X._get_numeric_data()
X.head(1)
Out[9]:
In [11]:
# create response vector y
y = X.Survived
y.head(3)
Out[11]:
In [21]:
# delete 'Survived', the response vector (Series)
X.drop('Survived', axis=1, inplace=True)
# we drop age for the sake of this example because it contains NaN in some examples
X.drop('Age', axis=1, inplace=True)
In [22]:
# check delete
X.head()
Out[22]:
In [23]:
# imports for classifiers and metrics
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import f1_score
In [24]:
# train/test split
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
In [25]:
# Decision Tree Classifier
# instantiate
dtc = DecisionTreeClassifier()
# fit
dtc.fit(X_train, y_train)
# predict
y_pred = dtc.predict(X_test)
# f1 score
score = f1_score(y_pred, y_test)
# print
print "Decision Tree F1 score: {:.2f}".format(score)
In [27]:
# Gaussian Naive Bayes
# instantiate
gnb = GaussianNB()
# fit
gnb.fit(X_train, y_train)
# predict
y_pred_2 = gnb.predict(X_test)
# f1 score
score_2 = f1_score(y_pred_2, y_test)
# print
print "GaussianNB F1 score: {: .2f}".format(score_2)