From 6410dda0cbe95c77032f99c831ded8ac32f2fa1a Mon Sep 17 00:00:00 2001
From: Pavan Mandava <mspavan04@gmail.com>
Date: Mon, 4 May 2020 00:56:23 +0200
Subject: [PATCH 1/4] Micro Averaging done

---
 eval/metrics.py         | 29 +++++++++++++++++++++--------
 testing/eval_testing.py | 11 ++++++++---
 utils/csv.py            |  0
 3 files changed, 29 insertions(+), 11 deletions(-)
 create mode 100644 utils/csv.py

diff --git a/eval/metrics.py b/eval/metrics.py
index feacd0a..5ca2331 100644
--- a/eval/metrics.py
+++ b/eval/metrics.py
@@ -18,8 +18,9 @@ def f1_score(y_true, y_pred, labels, average):
                     Use :func:`~eval.metrics.Result.print_result` to print F1 Score on the Console
     """
 
+    # pr_list - list of dictionaries with precision, recall, TPs, FPs and FNs for each label
+    pr_list = get_precision_recall(y_true, y_pred, labels)
     if average is None or average == const.AVG_MACRO:
-        pr_list = get_precision_recall(y_true, y_pred, labels)
         f1_score_list = []
         f1_sum = 0
         for item in pr_list:
@@ -28,17 +29,29 @@ def f1_score(y_true, y_pred, labels, average):
             f_score = calculate_f1_score(precision, recall)
             f1_sum += f_score
             if average is None:
-                f1_score_list.append(Result(precision, recall, average, item['label'], f_score))
+                f1_score_list.append(Result(precision, recall, average, item['label'], round(f_score, 3)))
 
         if average is None:
             return f1_score_list
         elif average == const.AVG_MACRO:
-            return [Result(None, None, average, None, f1_sum / len(pr_list))]
+            return [Result(None, None, average, None, round(f1_sum / len(pr_list), 3))]
 
     elif average == const.AVG_MICRO:
-        print('test test test')
-        print("another test comment")
-        pass
+        aggregate_tp = 0
+        aggregate_fp = 0
+        aggregate_fn = 0
+
+        for item in pr_list:
+            aggregate_tp += item['tp']
+            aggregate_fp += item['fp']
+            aggregate_fn += item['fn']
+
+        # find precision and recall for aggregate TP, FP & FN
+        agg_precision = get_precision(aggregate_tp, aggregate_fp)
+        agg_recall = get_recall(aggregate_tp, aggregate_fn)
+
+        agg_f1_score = calculate_f1_score(agg_precision, agg_recall)
+        return [Result(agg_precision, agg_recall, average, None, round(agg_f1_score, 3))]
 
     return None
 
@@ -63,7 +76,7 @@ def get_precision_recall(y_true, y_pred, labels=None):
         raise ValueError('Length of Gold standard labels and Predicted labels must be the same')
 
     all_labels = False
-    if labels is None or len(labels) is 0:
+    if labels is None or len(labels) == 0:
         # get the precision and recall for all the labels
         all_labels = True
 
@@ -164,4 +177,4 @@ class Result:
 
     def print_result(self):
         """ Prints F1 Score"""
-        print('F1 Score :: ', self.f1_score, ' Label :: ', self.label)
+        print('F1 Score :: ', self.f1_score, ' Label :: ', self.label, ' Average :: ', self.average)
diff --git a/testing/eval_testing.py b/testing/eval_testing.py
index 89782fa..e6fa801 100644
--- a/testing/eval_testing.py
+++ b/testing/eval_testing.py
@@ -1,10 +1,15 @@
 from eval.metrics import f1_score
 import utils.constants as const
 
-y_true = ['positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative']
-y_pred = ['positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'negative']
+y_true = ['positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative']
+y_pred = ['positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'negative']
 
 result_list = f1_score(y_true, y_pred, ['positive', 'negative'], const.AVG_MICRO)
 
 for result in result_list:
-    result.print_result()
\ No newline at end of file
+    result.print_result()
+
+result_list = f1_score(y_true, y_pred, ['positive', 'negative'], const.AVG_MACRO)
+
+for result in result_list:
+    result.print_result()
diff --git a/utils/csv.py b/utils/csv.py
new file mode 100644
index 0000000..e69de29

From 3fe33ab51a64c303fffa40b7c874a699b9710ebb Mon Sep 17 00:00:00 2001
From: Pavan Mandava <mspavan04@gmail.com>
Date: Mon, 4 May 2020 09:04:55 +0200
Subject: [PATCH 2/4] Comparision with sklearn metrics done - testing

---
 eval/metrics.py         | 6 +++---
 testing/eval_testing.py | 5 +++++
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/eval/metrics.py b/eval/metrics.py
index 5ca2331..9719648 100644
--- a/eval/metrics.py
+++ b/eval/metrics.py
@@ -29,12 +29,12 @@ def f1_score(y_true, y_pred, labels, average):
             f_score = calculate_f1_score(precision, recall)
             f1_sum += f_score
             if average is None:
-                f1_score_list.append(Result(precision, recall, average, item['label'], round(f_score, 3)))
+                f1_score_list.append(Result(precision, recall, average, item['label'], round(f_score, 4)))
 
         if average is None:
             return f1_score_list
         elif average == const.AVG_MACRO:
-            return [Result(None, None, average, None, round(f1_sum / len(pr_list), 3))]
+            return [Result(None, None, average, None, round(f1_sum / len(pr_list), 4))]
 
     elif average == const.AVG_MICRO:
         aggregate_tp = 0
@@ -51,7 +51,7 @@ def f1_score(y_true, y_pred, labels, average):
         agg_recall = get_recall(aggregate_tp, aggregate_fn)
 
         agg_f1_score = calculate_f1_score(agg_precision, agg_recall)
-        return [Result(agg_precision, agg_recall, average, None, round(agg_f1_score, 3))]
+        return [Result(agg_precision, agg_recall, average, None, round(agg_f1_score, 4))]
 
     return None
 
diff --git a/testing/eval_testing.py b/testing/eval_testing.py
index e6fa801..acf2c7d 100644
--- a/testing/eval_testing.py
+++ b/testing/eval_testing.py
@@ -1,5 +1,6 @@
 from eval.metrics import f1_score
 import utils.constants as const
+from sklearn.metrics import f1_score as f1
 
 y_true = ['positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative']
 y_pred = ['positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'negative']
@@ -9,7 +10,11 @@ result_list = f1_score(y_true, y_pred, ['positive', 'negative'], const.AVG_MICRO
 for result in result_list:
     result.print_result()
 
+print('SK Learn F1 Score (MICRO):: ', f1(y_true, y_pred, ['positive', 'negative'], average='micro'))
+
 result_list = f1_score(y_true, y_pred, ['positive', 'negative'], const.AVG_MACRO)
 
 for result in result_list:
     result.print_result()
+
+print('SK Learn F1 Score (MACRO):: ', f1(y_true, y_pred, ['positive', 'negative'], average='macro'))

From 0577f982a26e2f4a1516720c22d201e6fa8d1776 Mon Sep 17 00:00:00 2001
From: Pavan Mandava <mspavan04@gmail.com>
Date: Sun, 10 May 2020 18:05:25 +0200
Subject: [PATCH 3/4] Reading Train file done, fixed os path issues

---
 testing/eval_testing.py | 12 ++++++++++++
 utils/csv.py            | 19 +++++++++++++++++++
 utils/models.py         | 13 +++++++++++++
 3 files changed, 44 insertions(+)
 create mode 100644 utils/models.py

diff --git a/testing/eval_testing.py b/testing/eval_testing.py
index acf2c7d..b4353ef 100644
--- a/testing/eval_testing.py
+++ b/testing/eval_testing.py
@@ -1,6 +1,8 @@
 from eval.metrics import f1_score
 import utils.constants as const
 from sklearn.metrics import f1_score as f1
+import os
+from utils.csv import read_csv_file
 
 y_true = ['positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative']
 y_pred = ['positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'negative']
@@ -18,3 +20,13 @@ for result in result_list:
     result.print_result()
 
 print('SK Learn F1 Score (MACRO):: ', f1(y_true, y_pred, ['positive', 'negative'], average='macro'))
+
+
+project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+train_file_path = project_root+'/data/tsv/train.tsv'
+print(train_file_path)
+
+data = read_csv_file(csv_file_path=train_file_path, delimiter='\t')
+for inst in data[:5]:
+    inst.print()
diff --git a/utils/csv.py b/utils/csv.py
index e69de29..51b32c2 100644
--- a/utils/csv.py
+++ b/utils/csv.py
@@ -0,0 +1,19 @@
+import csv
+from utils.models import DataInstance
+
+
+def read_csv_file(csv_file_path, delimiter='\t'):
+    """
+     This function takes file path as an argument, reads the data file and
+     returns a list of DataInstance objects with text and true labels
+
+    :param delimiter: Delimiter for the file. Default is Tab(\t)
+    :param csv_file_path: path to the TSV/CSV file
+    :return: returns a list of  DataInstance class objects. <utils.models.DataInstance>
+    """
+    with open(csv_file_path, 'r') as file:
+        file_data = csv.reader(file, delimiter=delimiter)
+        data = []
+        for row in file_data:
+            data.append(DataInstance(row[0], row[2], row[3]))
+        return data
diff --git a/utils/models.py b/utils/models.py
new file mode 100644
index 0000000..f2a6753
--- /dev/null
+++ b/utils/models.py
@@ -0,0 +1,13 @@
+
+class DataInstance:
+    """
+    Model Class for carrying Training and Testing data from tsc/csv file
+    """
+
+    def __init__(self, r_id, text, true_label):
+        self.did = r_id
+        self.text = text
+        self.true_label = true_label
+
+    def print(self):
+        print('True Label :: ', self.true_label, ' Text :: ', self.text)

From 3455c34601c22679201154bd7169be224a8ba109 Mon Sep 17 00:00:00 2001
From: Pavan Mandava <mspavan04@gmail.com>
Date: Sun, 10 May 2020 19:14:24 +0200
Subject: [PATCH 4/4] Added Structure for Perceptron and Multi-Class Perceptron

---
 classifier/__init__.py     |  0
 classifier/linear_model.py | 17 +++++++++++++++++
 2 files changed, 17 insertions(+)
 create mode 100644 classifier/__init__.py
 create mode 100644 classifier/linear_model.py

diff --git a/classifier/__init__.py b/classifier/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/classifier/linear_model.py b/classifier/linear_model.py
new file mode 100644
index 0000000..99fd07c
--- /dev/null
+++ b/classifier/linear_model.py
@@ -0,0 +1,17 @@
+class Perceptron:
+
+    def __init__(self, label):
+        self.classifier_label = label
+        pass
+
+    def fit(self, X, y, weights=None):
+        pass
+
+    def predict(self, X):
+        pass
+
+
+class MultiClassPerceptron:
+
+    def __init__(self):
+        pass