Merge branch 'master' into isaac

# Conflicts: # eval/metrics.py
6 years ago · 87989f223a
parent a3a3043bbb 3455c34601
commit 87989f223a
6 changed files with 96 additions and 10 deletions
--- a/classifier/init.py
+++ b/classifier/init.py
--- a/classifier/linear_model.py
+++ b/classifier/linear_model.py
@ -0,0 +1,17 @@
+class Perceptron:
+
+    def __init__(self, label):
+        self.classifier_label = label
+        pass
+
+    def fit(self, X, y, weights=None):
+        pass
+
+    def predict(self, X):
+        pass
+
+
+class MultiClassPerceptron:
+
+    def __init__(self):
+        pass
--- a/eval/metrics.py
+++ b/eval/metrics.py
@ -17,9 +17,10 @@ def f1_score(y_true, y_pred, labels, average):
    :return: returns a list of  Result class objects. <eval.metrics.Result>
                    Use :func:`~eval.metrics.Result.print_result` to print F1 Score on the Console
    """
-    assert len(list(y_true))==len(list(y_pred))
+
+    # pr_list - list of dictionaries with precision, recall, TPs, FPs and FNs for each label
+    pr_list = get_precision_recall(y_true, y_pred, labels)
    if average is None or average == const.AVG_MACRO:
-        pr_list = get_precision_recall(y_true, y_pred, labels)
        f1_score_list = []
        f1_sum = 0
        for item in pr_list:
@ -28,15 +29,29 @@ def f1_score(y_true, y_pred, labels, average):
            f_score = calculate_f1_score(precision, recall)
            f1_sum += f_score
            if average is None:
-                f1_score_list.append(Result(precision, recall, average, item['label'], f_score))
+                f1_score_list.append(Result(precision, recall, average, item['label'], round(f_score, 4)))

        if average is None:
            return f1_score_list
        elif average == const.AVG_MACRO:
-            return [Result(None, None, average, None, f1_sum / len(pr_list))]
+            return [Result(None, None, average, None, round(f1_sum / len(pr_list), 4))]

    elif average == const.AVG_MICRO:
-        return sum([a==b for a,b in zip(y_true, y_pred)])
+        aggregate_tp = 0
+        aggregate_fp = 0
+        aggregate_fn = 0
+
+        for item in pr_list:
+            aggregate_tp += item['tp']
+            aggregate_fp += item['fp']
+            aggregate_fn += item['fn']
+
+        # find precision and recall for aggregate TP, FP & FN
+        agg_precision = get_precision(aggregate_tp, aggregate_fp)
+        agg_recall = get_recall(aggregate_tp, aggregate_fn)
+
+        agg_f1_score = calculate_f1_score(agg_precision, agg_recall)
+        return [Result(agg_precision, agg_recall, average, None, round(agg_f1_score, 4))]

    return None

@ -61,7 +76,7 @@ def get_precision_recall(y_true, y_pred, labels=None):
        raise ValueError('Length of Gold standard labels and Predicted labels must be the same')

    all_labels = False
-    if labels is None or len(labels) is 0:
+    if labels is None or len(labels) == 0:
        # get the precision and recall for all the labels
        all_labels = True

@ -162,4 +177,4 @@ class Result:

    def print_result(self):
        """ Prints F1 Score"""
-        print('F1 Score :: ', self.f1_score, ' Label :: ', self.label)
+        print('F1 Score :: ', self.f1_score, ' Label :: ', self.label, ' Average :: ', self.average)
--- a/testing/eval_testing.py
+++ b/testing/eval_testing.py
@ -1,10 +1,32 @@
 from eval.metrics import f1_score
 import utils.constants as const
+from sklearn.metrics import f1_score as f1
+import os
+from utils.csv import read_csv_file

-y_true = ['positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative']
-y_pred = ['positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'negative']
+y_true = ['positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative']
+y_pred = ['positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'negative']

 result_list = f1_score(y_true, y_pred, ['positive', 'negative'], None)

 for result in result_list:
-    result.print_result()
+    result.print_result()
+
+print('SK Learn F1 Score (MICRO):: ', f1(y_true, y_pred, ['positive', 'negative'], average='micro'))
+
+result_list = f1_score(y_true, y_pred, ['positive', 'negative'], const.AVG_MACRO)
+
+for result in result_list:
+    result.print_result()
+
+print('SK Learn F1 Score (MACRO):: ', f1(y_true, y_pred, ['positive', 'negative'], average='macro'))
+
+
+project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+train_file_path = project_root+'/data/tsv/train.tsv'
+print(train_file_path)
+
+data = read_csv_file(csv_file_path=train_file_path, delimiter='\t')
+for inst in data[:5]:
+    inst.print()
--- a/utils/csv.py
+++ b/utils/csv.py
@ -0,0 +1,19 @@
+import csv
+from utils.models import DataInstance
+
+
+def read_csv_file(csv_file_path, delimiter='\t'):
+    """
+     This function takes file path as an argument, reads the data file and
+     returns a list of DataInstance objects with text and true labels
+
+    :param delimiter: Delimiter for the file. Default is Tab(\t)
+    :param csv_file_path: path to the TSV/CSV file
+    :return: returns a list of  DataInstance class objects. <utils.models.DataInstance>
+    """
+    with open(csv_file_path, 'r') as file:
+        file_data = csv.reader(file, delimiter=delimiter)
+        data = []
+        for row in file_data:
+            data.append(DataInstance(row[0], row[2], row[3]))
+        return data
--- a/utils/models.py
+++ b/utils/models.py
@ -0,0 +1,13 @@
+
+class DataInstance:
+    """
+    Model Class for carrying Training and Testing data from tsc/csv file
+    """
+
+    def __init__(self, r_id, text, true_label):
+        self.did = r_id
+        self.text = text
+        self.true_label = true_label
+
+    def print(self):
+        print('True Label :: ', self.true_label, ' Text :: ', self.text)