You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

38 lines
1.2 KiB

import csv
from feature_extraction.features import extract_features_from_text
def read_csv_file(csv_file_path, delimiter='\t'):
"""
This function takes file path as an argument, reads the data file and
returns a list of DataInstance objects with text and true labels
:param delimiter: Delimiter for the file. Default is Tab(\t)
:param csv_file_path: path to the TSV/CSV file
:return: returns a list of DataInstance class objects. <utils.models.DataInstance>
"""
with open(csv_file_path, 'r') as file:
file_data = csv.reader(file, delimiter=delimiter)
data = []
for row in file_data:
data.append(DataInstance(row[0], row[2], row[3]))
return data
class DataInstance:
"""
Model Class for carrying Training and Testing data from tsv/csv file.
Also carries the extracted features.
"""
def __init__(self, r_id, text, true_label):
self.did = r_id
self.text = text
self.true_label = true_label
self.predicted_label = None
self.features = extract_features_from_text(text)
def print(self):
print('\nTrue Label :: ', self.true_label, ' Text :: ', self.text)
print('Features :: ', self.features)