From 1e470f829e2454709cbd7bee3ec01e579587ff7f Mon Sep 17 00:00:00 2001
From: Pavan Mandava <mspavan04@gmail.com>
Date: Mon, 27 Apr 2020 14:32:32 +0200
Subject: [PATCH] Finished Macro Averaging and average = None case, added some
 testing code

---
 eval/metrics.py         | 114 ++++++++++++++++++++++++++++++++++++++--
 testing/eval_testing.py |   8 +++
 utils/__init__.py       |   0
 utils/constants.py      |   2 +
 4 files changed, 120 insertions(+), 4 deletions(-)
 create mode 100644 utils/__init__.py
 create mode 100644 utils/constants.py

diff --git a/eval/metrics.py b/eval/metrics.py
index 5ddcb15..4fb1bdd 100644
--- a/eval/metrics.py
+++ b/eval/metrics.py
@@ -1,15 +1,121 @@
+import utils.constants as const
+
+
 def f1_score(y_true, y_pred, labels, average):
-    return 0
+    if average is None or average == const.AVG_MACRO:
+        pr_list = get_precision_recall(y_true, y_pred, labels)
+        f1_score_list = []
+        f1_sum = 0
+        for item in pr_list:
+            precision = item['precision']
+            recall = item['recall']
+            f_score = calculate_f1_score(precision, recall)
+            f1_sum += f_score
+            if average is None:
+                f1_score_list.append(Result(precision, recall, average, item['label'], f_score))
+
+        if average is None:
+            return f1_score_list
+        elif average == const.AVG_MACRO:
+            return [Result(None, None, average, None, f1_sum / len(pr_list))]
+
+    elif average == const.AVG_MICRO:
+        pass
+
+    return None
+
+
+def get_precision_recall(y_true, y_pred, labels=None):
+    """
+    This method takes Gold Standard Labels and Predicted Labels as arguments
+    and computes Precision and Recall for all the labels(including TP, FP, FN).
+
+    Returns a list of dictionaries with precision, recall, tp, fp, fn
+
+    :param y_true: list of Gold labels
+    :param y_pred: list of predicted labels
+    :param labels: Optional, list of labels for which
+    :return: returns the list of dictionaries with Precision and Recall values
+                [
+                 {'label': 'method', 'precision': 0.71, 'recall': 0.71, 'tp': 5, 'fp': 2, 'fn': 2}
+                 {'label': 'background', 'precision': 0.56, 'recall': 0.49, 'tp': 3, 'fp': 2, 'fn': 2}
+                ]
+    """
+    if len(y_true) != len(y_pred):
+        raise ValueError('Length of Gold standard labels and Predicted labels must be the same')
+
+    all_labels = False
+    if labels is None or len(labels) is 0:
+        # get the precision and recall for all the labels
+        all_labels = True
+
+    pr_dict = {}
+
+    gold_iter = iter(y_true)
+    pred_iter = iter(y_pred)
+
+    while True:
+        gold_label = next(gold_iter, None)
+        pred_label = next(pred_iter, None)
+
+        # check if the iterator is empty or finished iterating
+        if gold_label is None or pred_label is None:
+            break
+
+        # Add label entry to the dictionary, if not available
+        if gold_label not in pr_dict:
+            pr_dict[gold_label] = {'tp': 0, 'fp': 0, 'fn': 0}
+
+        # Add label entry to the dictionary, if not available
+        if pred_label not in pr_dict:
+            pr_dict[pred_label] = {'tp': 0, 'fp': 0, 'fn': 0}
+
+        if gold_label == pred_label:
+            # predicted correctly
+            pr_dict[gold_label]['tp'] += 1
+        else:
+            # Predicted not in class
+            pr_dict[gold_label]['fn'] += 1
+            # Predicted in class, but Gold is not in class
+            pr_dict[pred_label]['fp'] += 1
+    # end while
+
+    pr_list = []
+
+    if all_labels:
+        labels = list(pr_dict.keys())
+
+    for label in labels:
+        tp = pr_dict[label]['tp']
+        fp = pr_dict[label]['fp']
+        fn = pr_dict[label]['fn']
+        precision = get_precision(tp, fp)
+        recall = get_recall(tp, fn)
+        pr_list.append({'label': label, 'precision': precision, 'recall': recall, 'tp': tp, 'fp': fp, 'fn': fn})
+
+    return pr_list
+
+
+def get_precision(tp, fp):
+    return tp / (tp + fp)
+
+
+def get_recall(tp, fn):
+    return tp / (tp + fn)
+
+
+def calculate_f1_score(precision, recall):
+    return 2 * (precision * recall) / (precision + recall)
 
 
 class Result:
 
-    def __init__(self, precision, recall, average, label):
+    def __init__(self, precision, recall, average, label, f_score):
         self.precision = precision
         self.recall = recall
         self.average = average
         self.label = label
-        self.f1_score = 2 * (precision * recall) / (precision + recall)
+        self.f1_score = f_score
 
     def print_result(self):
-        print('F1 Score :: ',self.f1_score)
+        print('F1 Score :: ', self.f1_score, ' Label :: ', self.label)
diff --git a/testing/eval_testing.py b/testing/eval_testing.py
index 869c0cc..28b112d 100644
--- a/testing/eval_testing.py
+++ b/testing/eval_testing.py
@@ -1,2 +1,10 @@
 from eval.metrics import f1_score
+import utils.constants as const
 
+y_true = ['positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative']
+y_pred = ['positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'negative']
+
+result_list = f1_score(y_true, y_pred, ['positive', 'negative'], const.AVG_MACRO)
+
+for result in result_list:
+    result.print_result()
\ No newline at end of file
diff --git a/utils/__init__.py b/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/utils/constants.py b/utils/constants.py
new file mode 100644
index 0000000..7941faf
--- /dev/null
+++ b/utils/constants.py
@@ -0,0 +1,2 @@
+AVG_MICRO = 'MICRO'
+AVG_MACRO = 'MACRO'
\ No newline at end of file