From 1e470f829e2454709cbd7bee3ec01e579587ff7f Mon Sep 17 00:00:00 2001 From: Pavan Mandava Date: Mon, 27 Apr 2020 14:32:32 +0200 Subject: [PATCH] Finished Macro Averaging and average = None case, added some testing code --- eval/metrics.py | 114 ++++++++++++++++++++++++++++++++++++++-- testing/eval_testing.py | 8 +++ utils/__init__.py | 0 utils/constants.py | 2 + 4 files changed, 120 insertions(+), 4 deletions(-) create mode 100644 utils/__init__.py create mode 100644 utils/constants.py diff --git a/eval/metrics.py b/eval/metrics.py index 5ddcb15..4fb1bdd 100644 --- a/eval/metrics.py +++ b/eval/metrics.py @@ -1,15 +1,121 @@ +import utils.constants as const + + def f1_score(y_true, y_pred, labels, average): - return 0 + if average is None or average == const.AVG_MACRO: + pr_list = get_precision_recall(y_true, y_pred, labels) + f1_score_list = [] + f1_sum = 0 + for item in pr_list: + precision = item['precision'] + recall = item['recall'] + f_score = calculate_f1_score(precision, recall) + f1_sum += f_score + if average is None: + f1_score_list.append(Result(precision, recall, average, item['label'], f_score)) + + if average is None: + return f1_score_list + elif average == const.AVG_MACRO: + return [Result(None, None, average, None, f1_sum / len(pr_list))] + + elif average == const.AVG_MICRO: + pass + + return None + + +def get_precision_recall(y_true, y_pred, labels=None): + """ + This method takes Gold Standard Labels and Predicted Labels as arguments + and computes Precision and Recall for all the labels(including TP, FP, FN). + + Returns a list of dictionaries with precision, recall, tp, fp, fn + + :param y_true: list of Gold labels + :param y_pred: list of predicted labels + :param labels: Optional, list of labels for which + :return: returns the list of dictionaries with Precision and Recall values + [ + {'label': 'method', 'precision': 0.71, 'recall': 0.71, 'tp': 5, 'fp': 2, 'fn': 2} + {'label': 'background', 'precision': 0.56, 'recall': 0.49, 'tp': 3, 'fp': 2, 'fn': 2} + ] + """ + if len(y_true) != len(y_pred): + raise ValueError('Length of Gold standard labels and Predicted labels must be the same') + + all_labels = False + if labels is None or len(labels) is 0: + # get the precision and recall for all the labels + all_labels = True + + pr_dict = {} + + gold_iter = iter(y_true) + pred_iter = iter(y_pred) + + while True: + gold_label = next(gold_iter, None) + pred_label = next(pred_iter, None) + + # check if the iterator is empty or finished iterating + if gold_label is None or pred_label is None: + break + + # Add label entry to the dictionary, if not available + if gold_label not in pr_dict: + pr_dict[gold_label] = {'tp': 0, 'fp': 0, 'fn': 0} + + # Add label entry to the dictionary, if not available + if pred_label not in pr_dict: + pr_dict[pred_label] = {'tp': 0, 'fp': 0, 'fn': 0} + + if gold_label == pred_label: + # predicted correctly + pr_dict[gold_label]['tp'] += 1 + else: + # Predicted not in class + pr_dict[gold_label]['fn'] += 1 + # Predicted in class, but Gold is not in class + pr_dict[pred_label]['fp'] += 1 + # end while + + pr_list = [] + + if all_labels: + labels = list(pr_dict.keys()) + + for label in labels: + tp = pr_dict[label]['tp'] + fp = pr_dict[label]['fp'] + fn = pr_dict[label]['fn'] + precision = get_precision(tp, fp) + recall = get_recall(tp, fn) + pr_list.append({'label': label, 'precision': precision, 'recall': recall, 'tp': tp, 'fp': fp, 'fn': fn}) + + return pr_list + + +def get_precision(tp, fp): + return tp / (tp + fp) + + +def get_recall(tp, fn): + return tp / (tp + fn) + + +def calculate_f1_score(precision, recall): + return 2 * (precision * recall) / (precision + recall) class Result: - def __init__(self, precision, recall, average, label): + def __init__(self, precision, recall, average, label, f_score): self.precision = precision self.recall = recall self.average = average self.label = label - self.f1_score = 2 * (precision * recall) / (precision + recall) + self.f1_score = f_score def print_result(self): - print('F1 Score :: ',self.f1_score) + print('F1 Score :: ', self.f1_score, ' Label :: ', self.label) diff --git a/testing/eval_testing.py b/testing/eval_testing.py index 869c0cc..28b112d 100644 --- a/testing/eval_testing.py +++ b/testing/eval_testing.py @@ -1,2 +1,10 @@ from eval.metrics import f1_score +import utils.constants as const +y_true = ['positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative'] +y_pred = ['positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'negative'] + +result_list = f1_score(y_true, y_pred, ['positive', 'negative'], const.AVG_MACRO) + +for result in result_list: + result.print_result() \ No newline at end of file diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/constants.py b/utils/constants.py new file mode 100644 index 0000000..7941faf --- /dev/null +++ b/utils/constants.py @@ -0,0 +1,2 @@ +AVG_MICRO = 'MICRO' +AVG_MACRO = 'MACRO' \ No newline at end of file