improved test file for FFNN

5 years ago · 6aa59d0712
parent 69f913c801
commit 6aa59d0712
5 changed files with 37 additions and 9 deletions
--- a/classifier/nn_ff.py
+++ b/classifier/nn_ff.py
@ -36,7 +36,7 @@ class FeedForward(torch.nn.Module):
    def read_data(self):
        """" Reads in training and test data and converts it to proper format. """
-        self.X_train_, self.y_train_, self.X_test = read_csv_nn()
+        self.X_train_, self.y_train_, self.X_test, self.y_test_ = read_csv_nn()
        self.X_test = torch.FloatTensor(self.X_test)
        yclass = np.array([(x[1] == 1) + 2 * (x[2] == 1) for x in self.y_train_])
        is0 = yclass == 0
@ -51,14 +51,13 @@ class FeedForward(torch.nn.Module):
        self.l0 = sum(is0)
        self.l1 = sum(is1)
        self.l2 = sum(is2)
        self.y_test = (self.y_test_[:, 1] == 1) + 2 * (self.y_test_[:, 2] == 1)
-    def fit(self, epochs=100, batch_size=16, lr=0.01, samples0=1000, samples1=1000, samples2=1000):
+    def fit(self, epochs=100, batch_size=16, lr=0.01, samples=(1000, 1000, 1000)):
        """ Trains model, using cross entropy loss and SGD optimizer. """
        self.criterion = torch.nn.CrossEntropyLoss()
        self.optimizer = torch.optim.SGD(self.parameters(), lr)
-        self.samples0 = samples0
+        self.samples0, self.samples1, self.samples2 = samples
        self.samples1 = samples1
        self.samples2 = samples2
        self.eval()  # put into eval mode
--- a/plots/confusion_matrix_plot.png
+++ b/plots/confusion_matrix_plot.png
--- a/plots/confusion_matrix_plot_ff.png
+++ b/plots/confusion_matrix_plot_ff.png
--- a/testing/ff_model_testing.py
+++ b/testing/ff_model_testing.py
@ -2,11 +2,33 @@ import sys
 import os
 sys.path.append(os.getcwd())
 from classifier.nn_ff import FeedForward
 from sklearn.metrics import f1_score
 from eval.metrics import plot_confusion_matrix, get_confusion_matrix
 project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-model = FeedForward(28, 9, 3)
+clf = FeedForward(28, 9, 3)
-model.fit()
+clf.fit()
-model.predict()
+clf.predict()
 # predict
 y_test = clf.preds
 y_true = clf.y_test
 # Model Evaluation
 labels = set(['background', 'method', 'result'])
 f1_score_micro = f1_score(y_true, y_test, average='micro')
 f1_score_macro = f1_score(y_true, y_test, average='macro')
 # Print F1 Score
 print('F1 score (micro): ', f1_score_micro)
 print('F1 score (macro): ', f1_score_macro)
 # plot confusion matrix
 classdict = {0: 'background', 1: 'method', 2: 'result'}
 y_test = [classdict[x] for x in y_test]
 y_true = [classdict[x] for x in y_true]
 plot_path = project_root + '/plots/confusion_matrix_plot_ff.png'
 plot_confusion_matrix(get_confusion_matrix(y_true, y_test), 'Feed-forward NN Classifier (Baseline)', plot_path)
--- a/utils/nn_reader.py
+++ b/utils/nn_reader.py
@ -9,6 +9,7 @@ def read_csv_nn(scicite_dir=None):
 	train_file_path = 'data/tsv/train.tsv'
 	test_file_path = 'data/tsv/test.tsv'
 	train_raw = read_csv_file(train_file_path, '\t')
 	test_raw = read_csv_file(test_file_path, '\t')
 	features = [x.features for x in train_raw]
 	features_unique = list(set(chain.from_iterable(features)))
@ -44,7 +45,13 @@ def read_csv_nn(scicite_dir=None):
 			if f in features[i]:
 				X_test[i,j] = 1
-	return X_train, y_train, X_test
+	y_test_raw = np.array([x.true_label for x in test_raw])
 	y_test = np.zeros((nobs, y_dim))
 	for j in range(y_dim):
 		y_test[:, j] = y_test_raw == y_unique[j]
 	return X_train, y_train, X_test, y_test