Finished Macro Averaging and average = None case, added some testing code

6 years ago · 1e470f829e
parent 821ca89dd7
commit 1e470f829e
4 changed files with 120 additions and 4 deletions
--- a/eval/metrics.py
+++ b/eval/metrics.py
@ -1,15 +1,121 @@
 import utils.constants as const
 def f1_score(y_true, y_pred, labels, average):
-    return 0
+    if average is None or average == const.AVG_MACRO:
        pr_list = get_precision_recall(y_true, y_pred, labels)
        f1_score_list = []
        f1_sum = 0
        for item in pr_list:
            precision = item['precision']
            recall = item['recall']
            f_score = calculate_f1_score(precision, recall)
            f1_sum += f_score
            if average is None:
                f1_score_list.append(Result(precision, recall, average, item['label'], f_score))
        if average is None:
            return f1_score_list
        elif average == const.AVG_MACRO:
            return [Result(None, None, average, None, f1_sum / len(pr_list))]
    elif average == const.AVG_MICRO:
        pass
    return None
 def get_precision_recall(y_true, y_pred, labels=None):
    """
    This method takes Gold Standard Labels and Predicted Labels as arguments
    and computes Precision and Recall for all the labels(including TP, FP, FN).
    Returns a list of dictionaries with precision, recall, tp, fp, fn
    :param y_true: list of Gold labels
    :param y_pred: list of predicted labels
    :param labels: Optional, list of labels for which
    :return: returns the list of dictionaries with Precision and Recall values
                [
                 {'label': 'method', 'precision': 0.71, 'recall': 0.71, 'tp': 5, 'fp': 2, 'fn': 2}
                 {'label': 'background', 'precision': 0.56, 'recall': 0.49, 'tp': 3, 'fp': 2, 'fn': 2}
                ]
    """
    if len(y_true) != len(y_pred):
        raise ValueError('Length of Gold standard labels and Predicted labels must be the same')
    all_labels = False
    if labels is None or len(labels) is 0:
        # get the precision and recall for all the labels
        all_labels = True
    pr_dict = {}
    gold_iter = iter(y_true)
    pred_iter = iter(y_pred)
    while True:
        gold_label = next(gold_iter, None)
        pred_label = next(pred_iter, None)
        # check if the iterator is empty or finished iterating
        if gold_label is None or pred_label is None:
            break
        # Add label entry to the dictionary, if not available
        if gold_label not in pr_dict:
            pr_dict[gold_label] = {'tp': 0, 'fp': 0, 'fn': 0}
        # Add label entry to the dictionary, if not available
        if pred_label not in pr_dict:
            pr_dict[pred_label] = {'tp': 0, 'fp': 0, 'fn': 0}
        if gold_label == pred_label:
            # predicted correctly
            pr_dict[gold_label]['tp'] += 1
        else:
            # Predicted not in class
            pr_dict[gold_label]['fn'] += 1
            # Predicted in class, but Gold is not in class
            pr_dict[pred_label]['fp'] += 1
    # end while
    pr_list = []
    if all_labels:
        labels = list(pr_dict.keys())
    for label in labels:
        tp = pr_dict[label]['tp']
        fp = pr_dict[label]['fp']
        fn = pr_dict[label]['fn']
        precision = get_precision(tp, fp)
        recall = get_recall(tp, fn)
        pr_list.append({'label': label, 'precision': precision, 'recall': recall, 'tp': tp, 'fp': fp, 'fn': fn})
    return pr_list
 def get_precision(tp, fp):
    return tp / (tp + fp)
 def get_recall(tp, fn):
    return tp / (tp + fn)
 def calculate_f1_score(precision, recall):
    return 2 * (precision * recall) / (precision + recall)
 class Result:
-    def __init__(self, precision, recall, average, label):
+    def __init__(self, precision, recall, average, label, f_score):
        self.precision = precision
        self.recall = recall
        self.average = average
        self.label = label
-        self.f1_score = 2 * (precision * recall) / (precision + recall)
+        self.f1_score = f_score
    def print_result(self):
-        print('F1 Score :: ',self.f1_score)
+        print('F1 Score :: ', self.f1_score, ' Label :: ', self.label)
--- a/testing/eval_testing.py
+++ b/testing/eval_testing.py
@ -1,2 +1,10 @@
 from eval.metrics import f1_score
 import utils.constants as const
 y_true = ['positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative']
 y_pred = ['positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'negative']
 result_list = f1_score(y_true, y_pred, ['positive', 'negative'], const.AVG_MACRO)
 for result in result_list:
    result.print_result()
--- a/utils/init.py
+++ b/utils/init.py
--- a/utils/constants.py
+++ b/utils/constants.py
@ -0,0 +1,2 @@
 AVG_MICRO = 'MICRO'
 AVG_MACRO = 'MACRO'