From 0577f982a26e2f4a1516720c22d201e6fa8d1776 Mon Sep 17 00:00:00 2001 From: Pavan Mandava Date: Sun, 10 May 2020 18:05:25 +0200 Subject: [PATCH] Reading Train file done, fixed os path issues --- testing/eval_testing.py | 12 ++++++++++++ utils/csv.py | 19 +++++++++++++++++++ utils/models.py | 13 +++++++++++++ 3 files changed, 44 insertions(+) create mode 100644 utils/models.py diff --git a/testing/eval_testing.py b/testing/eval_testing.py index acf2c7d..b4353ef 100644 --- a/testing/eval_testing.py +++ b/testing/eval_testing.py @@ -1,6 +1,8 @@ from eval.metrics import f1_score import utils.constants as const from sklearn.metrics import f1_score as f1 +import os +from utils.csv import read_csv_file y_true = ['positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative'] y_pred = ['positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'negative'] @@ -18,3 +20,13 @@ for result in result_list: result.print_result() print('SK Learn F1 Score (MACRO):: ', f1(y_true, y_pred, ['positive', 'negative'], average='macro')) + + +project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + +train_file_path = project_root+'/data/tsv/train.tsv' +print(train_file_path) + +data = read_csv_file(csv_file_path=train_file_path, delimiter='\t') +for inst in data[:5]: + inst.print() diff --git a/utils/csv.py b/utils/csv.py index e69de29..51b32c2 100644 --- a/utils/csv.py +++ b/utils/csv.py @@ -0,0 +1,19 @@ +import csv +from utils.models import DataInstance + + +def read_csv_file(csv_file_path, delimiter='\t'): + """ + This function takes file path as an argument, reads the data file and + returns a list of DataInstance objects with text and true labels + + :param delimiter: Delimiter for the file. Default is Tab(\t) + :param csv_file_path: path to the TSV/CSV file + :return: returns a list of DataInstance class objects. + """ + with open(csv_file_path, 'r') as file: + file_data = csv.reader(file, delimiter=delimiter) + data = [] + for row in file_data: + data.append(DataInstance(row[0], row[2], row[3])) + return data diff --git a/utils/models.py b/utils/models.py new file mode 100644 index 0000000..f2a6753 --- /dev/null +++ b/utils/models.py @@ -0,0 +1,13 @@ + +class DataInstance: + """ + Model Class for carrying Training and Testing data from tsc/csv file + """ + + def __init__(self, r_id, text, true_label): + self.did = r_id + self.text = text + self.true_label = true_label + + def print(self): + print('True Label :: ', self.true_label, ' Text :: ', self.text)