From 038e612e4f585ffd8b2baa0ec629935eff4907a0 Mon Sep 17 00:00:00 2001 From: yelircaasi Date: Thu, 30 Jul 2020 14:36:38 +0200 Subject: [PATCH] minor changes to output file --- testing/save_test_results.py | 66 ++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 testing/save_test_results.py diff --git a/testing/save_test_results.py b/testing/save_test_results.py new file mode 100644 index 0000000..6097088 --- /dev/null +++ b/testing/save_test_results.py @@ -0,0 +1,66 @@ +#import os +#os.chdir('/Users/iriley/code/citation-analysis') +import sys +sys.path.append('/Users/iriley/code/citation-analysis') +from classifier.linear_model import MultiClassPerceptron +from sklearn.metrics import confusion_matrix as cm +from utils.csv import read_csv_file +from eval.metrics import f1_score +import utils.constants as const +import pandas as pd +import numpy as np + + + +train_file_path = '/Users/iriley/code/citation-analysis/data/tsv/train.tsv' +dev_file_path = '/Users/iriley/code/citation-analysis/data/tsv/test.tsv' + + +# Read the training dataset +X_train_inst = read_csv_file(train_file_path, '\t') + +# set of labels from Training data +labels = set([inst.true_label for inst in X_train_inst]) + +# Read test data set +X_dev_inst = read_csv_file(dev_file_path, '\t') + +# number of training iterations +epochs = 50 + +# create MultiClassPerceptron classifier object +clf = MultiClassPerceptron(epochs=epochs, learning_rate=0.5, random_state=101) + +# train the model +clf.fit(X_train=X_train_inst, labels=list(labels)) + +# predict +y_pred = clf.predict(X_dev_inst) +y_scores = np.array(clf.get_class_scores(X_dev_inst)) + +y_true = [inst.true_label for inst in X_dev_inst] + +labeldict = {'background': 0, 'method': 1, 'result': 2} +y_pred = np.array([labeldict[x] for x in y_pred]) +y_true = np.array([labeldict[x] for x in y_true]) + +conmat = cm(y_true, y_pred) + +df = pd.DataFrame() +df['pred'] = y_pred +df['true'] = y_true +df['correct'] = y_pred==y_true +df['score0'] = np.round(y_scores[:,0],3) +df['score1'] = np.round(y_scores[:,1],3) +df['score2'] = np.round(y_scores[:,2],3) + +df.to_csv('/Users/iriley/code/machine_learning/lab2020/y_pred_model1.csv', index=False) + +## Model Evaluation +#f1_score_micro = f1_score(y_true, y_pred, labels, const.AVG_MICRO) +#f1_score_macro = f1_score(y_true, y_pred, labels, const.AVG_MACRO) +#f1_score_none = f1_score(y_true, y_pred, labels, None) + +## Print F1 Score +#for result in f1_score_micro + f1_score_macro + f1_score_none: +# result.print_result() \ No newline at end of file