import utils.constants as const from sklearn.metrics import confusion_matrix import matplotlib.pyplot as plt import numpy as np import itertools def f1_score(y_true, y_pred, labels, average): """ F1 score is a weighted average of Precision and Recall(or Harmonic Mean between Precision and Recall). The formula for F1 Score is: F1 = 2 * (precision * recall) / (precision + recall) :param y_true: list of Gold labels :param y_pred: list of predicted labels :param labels: Optional, list of labels for PR Values :param average: String - (None|'MICRO'|'MACRO') : defined in utils.constants.py If None, the scores for each class are returned. MACRO - Macro Averaging : Compute F1 Score for each of the classes and average these numbers MICRO - Micro Averaging : Compute TP, FP, FN for each of the classes and sum these numbers (aggregate-TP,FP,FN) and compute F1 Score for aggregate TP, FP & FN :return: returns a list of Result class objects. Use :func:`~eval.metrics.Result.print_result` to print F1 Score on the Console """ # pr_list - list of dictionaries with precision, recall, TPs, FPs and FNs for each label pr_list = get_precision_recall(y_true, y_pred, labels) if average is None or average == const.AVG_MACRO: f1_score_list = [] f1_sum = 0 for item in pr_list: precision = item['precision'] recall = item['recall'] f_score = calculate_f1_score(precision, recall) f1_sum += f_score if average is None: f1_score_list.append(Result(precision, recall, average, item['label'], round(f_score, 4))) if average is None: return f1_score_list elif average == const.AVG_MACRO: return [Result(None, None, average, None, round(f1_sum / len(pr_list), 4))] elif average == const.AVG_MICRO: aggregate_tp = 0 aggregate_fp = 0 aggregate_fn = 0 for item in pr_list: aggregate_tp += item['tp'] aggregate_fp += item['fp'] aggregate_fn += item['fn'] # find precision and recall for aggregate TP, FP & FN agg_precision = get_precision(aggregate_tp, aggregate_fp) agg_recall = get_recall(aggregate_tp, aggregate_fn) agg_f1_score = calculate_f1_score(agg_precision, agg_recall) return [Result(agg_precision, agg_recall, average, None, round(agg_f1_score, 4))] return None def get_precision_recall(y_true, y_pred, labels=None): """ This method takes Gold Standard Labels and Predicted Labels as arguments and computes Precision and Recall for all the labels(including TP, FP, FN). Returns a list of dictionaries with precision, recall, tp, fp, fn :param y_true: list of Gold labels :param y_pred: list of predicted labels :param labels: Optional, list of labels for PR Values :return: returns the list of dictionaries with Precision and Recall values [ {'label': 'method', 'precision': 0.71, 'recall': 0.71, 'tp': 5, 'fp': 2, 'fn': 2} {'label': 'background', 'precision': 0.56, 'recall': 0.49, 'tp': 3, 'fp': 2, 'fn': 2} ] """ if len(y_true) != len(y_pred): raise ValueError('Length of Gold standard labels and Predicted labels must be the same') all_labels = False if labels is None or len(labels) == 0: # get the precision and recall for all the labels all_labels = True pr_dict = {} # use iterators for both y_true and y_pred gold_iter = iter(y_true) pred_iter = iter(y_pred) while True: gold_label = next(gold_iter, None) pred_label = next(pred_iter, None) # check if the iterator is empty or finished iterating if gold_label is None or pred_label is None: break # Add label entry to the dictionary, if not available if gold_label not in pr_dict: pr_dict[gold_label] = {'tp': 0, 'fp': 0, 'fn': 0} # Add label entry to the dictionary, if not available if pred_label not in pr_dict: pr_dict[pred_label] = {'tp': 0, 'fp': 0, 'fn': 0} if gold_label == pred_label: # predicted correctly pr_dict[gold_label]['tp'] += 1 else: # Predicted not in class pr_dict[gold_label]['fn'] += 1 # Predicted in class, but Gold is not in class pr_dict[pred_label]['fp'] += 1 # end while pr_list = [] if all_labels: labels = list(pr_dict.keys()) for label in labels: tp = pr_dict[label]['tp'] fp = pr_dict[label]['fp'] fn = pr_dict[label]['fn'] precision = get_precision(tp, fp) recall = get_recall(tp, fn) pr_list.append({'label': label, 'precision': precision, 'recall': recall, 'tp': tp, 'fp': fp, 'fn': fn}) return pr_list def get_precision(tp, fp): """ Calculates and Returns Precision. :param tp: Number of True Positives :param fp: Number of False Positives :return: Returns Precision value (usually floating point number) """ return tp / (tp + fp) def get_recall(tp, fn): """ Calculates and Returns Recall :param tp: Number of True Positives :param fn: Number of False Positives :return: Returns Recall Value ((usually floating point number)) """ return tp / (tp + fn) def calculate_f1_score(precision, recall): """ Takes Precision and Recall as params and computes F1 Score The formula for F1 Score is: F1 = 2 * (precision * recall) / (precision + recall) :param precision: Precision Value :param recall: Recall Value :return: Returns F1 Score """ return 2 * (precision * recall) / (precision + recall) def get_confusion_matrix(y_true, y_pred): """ takes predicted labels and true labels as parameters and returns Confusion Matrix :param y_true: True labels :param y_pred: Predicted labels :return: returns Confusion Matrix """ return confusion_matrix(y_true, y_pred, labels=const.CLASS_LABELS_LIST) def plot_confusion_matrix(confusion_mat, classifier_name, plot_file_name): plt.figure(figsize=(8, 6)) plt.imshow(confusion_mat, interpolation='nearest', cmap=plt.get_cmap('Blues')) plt.title(classifier_name) plt.colorbar() target_names = const.CLASS_LABELS_LIST if target_names is not None: tick_marks = np.arange(len(target_names)) plt.xticks(tick_marks, target_names, rotation=45) plt.yticks(tick_marks, target_names) thresh = confusion_mat.max() / confusion_mat.max() / 2 for i, j in itertools.product(range(confusion_mat.shape[0]), range(confusion_mat.shape[1])): plt.text(j, i, "{:,}".format(confusion_mat[i, j]), horizontalalignment="center", color="white" if confusion_mat[i, j] > thresh else "black") plt.tight_layout(1.5) plt.ylabel('True/Gold') plt.xlabel('Predicted') plt.savefig(plot_file_name) class Result: """ Model Class for carrying Evaluation Data (F1 Score, Precision, Recall, ....) """ def __init__(self, precision, recall, average, label, f_score): self.precision = precision self.recall = recall self.average = average self.label = label self.f1_score = f_score def print_result(self): """ Prints F1 Score""" print_line = 'F1 Score :: ' + str(self.f1_score) if self.label: print_line += ' Label :: ' + self.label if self.average: print_line += ' Average :: ' + self.average print(print_line)