parent
821ca89dd7
commit
1e470f829e
@ -1,15 +1,121 @@
|
|||||||
|
import utils.constants as const
|
||||||
|
|
||||||
|
|
||||||
def f1_score(y_true, y_pred, labels, average):
|
def f1_score(y_true, y_pred, labels, average):
|
||||||
return 0
|
if average is None or average == const.AVG_MACRO:
|
||||||
|
pr_list = get_precision_recall(y_true, y_pred, labels)
|
||||||
|
f1_score_list = []
|
||||||
|
f1_sum = 0
|
||||||
|
for item in pr_list:
|
||||||
|
precision = item['precision']
|
||||||
|
recall = item['recall']
|
||||||
|
f_score = calculate_f1_score(precision, recall)
|
||||||
|
f1_sum += f_score
|
||||||
|
if average is None:
|
||||||
|
f1_score_list.append(Result(precision, recall, average, item['label'], f_score))
|
||||||
|
|
||||||
|
if average is None:
|
||||||
|
return f1_score_list
|
||||||
|
elif average == const.AVG_MACRO:
|
||||||
|
return [Result(None, None, average, None, f1_sum / len(pr_list))]
|
||||||
|
|
||||||
|
elif average == const.AVG_MICRO:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_precision_recall(y_true, y_pred, labels=None):
|
||||||
|
"""
|
||||||
|
This method takes Gold Standard Labels and Predicted Labels as arguments
|
||||||
|
and computes Precision and Recall for all the labels(including TP, FP, FN).
|
||||||
|
|
||||||
|
Returns a list of dictionaries with precision, recall, tp, fp, fn
|
||||||
|
|
||||||
|
:param y_true: list of Gold labels
|
||||||
|
:param y_pred: list of predicted labels
|
||||||
|
:param labels: Optional, list of labels for which
|
||||||
|
:return: returns the list of dictionaries with Precision and Recall values
|
||||||
|
[
|
||||||
|
{'label': 'method', 'precision': 0.71, 'recall': 0.71, 'tp': 5, 'fp': 2, 'fn': 2}
|
||||||
|
{'label': 'background', 'precision': 0.56, 'recall': 0.49, 'tp': 3, 'fp': 2, 'fn': 2}
|
||||||
|
]
|
||||||
|
"""
|
||||||
|
if len(y_true) != len(y_pred):
|
||||||
|
raise ValueError('Length of Gold standard labels and Predicted labels must be the same')
|
||||||
|
|
||||||
|
all_labels = False
|
||||||
|
if labels is None or len(labels) is 0:
|
||||||
|
# get the precision and recall for all the labels
|
||||||
|
all_labels = True
|
||||||
|
|
||||||
|
pr_dict = {}
|
||||||
|
|
||||||
|
gold_iter = iter(y_true)
|
||||||
|
pred_iter = iter(y_pred)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
gold_label = next(gold_iter, None)
|
||||||
|
pred_label = next(pred_iter, None)
|
||||||
|
|
||||||
|
# check if the iterator is empty or finished iterating
|
||||||
|
if gold_label is None or pred_label is None:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Add label entry to the dictionary, if not available
|
||||||
|
if gold_label not in pr_dict:
|
||||||
|
pr_dict[gold_label] = {'tp': 0, 'fp': 0, 'fn': 0}
|
||||||
|
|
||||||
|
# Add label entry to the dictionary, if not available
|
||||||
|
if pred_label not in pr_dict:
|
||||||
|
pr_dict[pred_label] = {'tp': 0, 'fp': 0, 'fn': 0}
|
||||||
|
|
||||||
|
if gold_label == pred_label:
|
||||||
|
# predicted correctly
|
||||||
|
pr_dict[gold_label]['tp'] += 1
|
||||||
|
else:
|
||||||
|
# Predicted not in class
|
||||||
|
pr_dict[gold_label]['fn'] += 1
|
||||||
|
# Predicted in class, but Gold is not in class
|
||||||
|
pr_dict[pred_label]['fp'] += 1
|
||||||
|
# end while
|
||||||
|
|
||||||
|
pr_list = []
|
||||||
|
|
||||||
|
if all_labels:
|
||||||
|
labels = list(pr_dict.keys())
|
||||||
|
|
||||||
|
for label in labels:
|
||||||
|
tp = pr_dict[label]['tp']
|
||||||
|
fp = pr_dict[label]['fp']
|
||||||
|
fn = pr_dict[label]['fn']
|
||||||
|
precision = get_precision(tp, fp)
|
||||||
|
recall = get_recall(tp, fn)
|
||||||
|
pr_list.append({'label': label, 'precision': precision, 'recall': recall, 'tp': tp, 'fp': fp, 'fn': fn})
|
||||||
|
|
||||||
|
return pr_list
|
||||||
|
|
||||||
|
|
||||||
|
def get_precision(tp, fp):
|
||||||
|
return tp / (tp + fp)
|
||||||
|
|
||||||
|
|
||||||
|
def get_recall(tp, fn):
|
||||||
|
return tp / (tp + fn)
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_f1_score(precision, recall):
|
||||||
|
return 2 * (precision * recall) / (precision + recall)
|
||||||
|
|
||||||
|
|
||||||
class Result:
|
class Result:
|
||||||
|
|
||||||
def __init__(self, precision, recall, average, label):
|
def __init__(self, precision, recall, average, label, f_score):
|
||||||
self.precision = precision
|
self.precision = precision
|
||||||
self.recall = recall
|
self.recall = recall
|
||||||
self.average = average
|
self.average = average
|
||||||
self.label = label
|
self.label = label
|
||||||
self.f1_score = 2 * (precision * recall) / (precision + recall)
|
self.f1_score = f_score
|
||||||
|
|
||||||
def print_result(self):
|
def print_result(self):
|
||||||
print('F1 Score :: ',self.f1_score)
|
print('F1 Score :: ', self.f1_score, ' Label :: ', self.label)
|
||||||
|
|||||||
@ -1,2 +1,10 @@
|
|||||||
from eval.metrics import f1_score
|
from eval.metrics import f1_score
|
||||||
|
import utils.constants as const
|
||||||
|
|
||||||
|
y_true = ['positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative']
|
||||||
|
y_pred = ['positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'negative']
|
||||||
|
|
||||||
|
result_list = f1_score(y_true, y_pred, ['positive', 'negative'], const.AVG_MACRO)
|
||||||
|
|
||||||
|
for result in result_list:
|
||||||
|
result.print_result()
|
||||||
@ -0,0 +1,2 @@
|
|||||||
|
AVG_MICRO = 'MICRO'
|
||||||
|
AVG_MACRO = 'MACRO'
|
||||||
Loading…
Reference in new issue