parent
281205b0df
commit
a1ac7e6cfa
@ -0,0 +1,62 @@
|
||||
import os
|
||||
from classifier.linear_model import MultiClassPerceptron
|
||||
from sklearn.metrics import confusion_matrix as cm
|
||||
from utils.csv import read_csv_file
|
||||
from eval.metrics import f1_score
|
||||
import utils.constants as const
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
train_file_path = '/Users/iriley/code/citation-analysis/data/tsv/train.tsv'
|
||||
dev_file_path = '/Users/iriley/code/citation-analysis/data/tsv/dev.tsv'
|
||||
|
||||
|
||||
# Read the training dataset
|
||||
X_train_inst = read_csv_file(train_file_path, '\t')
|
||||
|
||||
# set of labels from Training data
|
||||
labels = set([inst.true_label for inst in X_train_inst])
|
||||
|
||||
# Read test data set
|
||||
X_dev_inst = read_csv_file(dev_file_path, '\t')
|
||||
|
||||
# number of training iterations
|
||||
epochs = 50
|
||||
|
||||
# create MultiClassPerceptron classifier object
|
||||
clf = MultiClassPerceptron(epochs=epochs, learning_rate=0.5, random_state=101)
|
||||
|
||||
# train the model
|
||||
clf.fit(X_train=X_train_inst, labels=list(labels))
|
||||
|
||||
# predict
|
||||
y_pred = clf.predict(X_dev_inst)
|
||||
y_scores = np.array(clf.get_class_scores(X_dev_inst))
|
||||
|
||||
y_true = [inst.true_label for inst in X_dev_inst]
|
||||
|
||||
labeldict = {'background': 0, 'method': 1, 'result': 2}
|
||||
y_pred = np.array([labeldict[x] for x in y_pred])
|
||||
y_true = np.array([labeldict[x] for x in y_true])
|
||||
|
||||
conmat = cm(y_true, y_pred)
|
||||
|
||||
df = pd.DataFrame()
|
||||
df['pred'] = y_pred
|
||||
df['true'] = y_true
|
||||
df['correct'] = y_pred==y_true
|
||||
df['score0'] = np.round(y_scores[:,0],3)
|
||||
df['score1'] = np.round(y_scores[:,1],3)
|
||||
df['score2'] = np.round(y_scores[:,2],3)
|
||||
|
||||
df.to_csv('/Users/iriley/code/machine_learning/lab2020/preds_perceptron.csv', index=False)
|
||||
|
||||
## Model Evaluation
|
||||
#f1_score_micro = f1_score(y_true, y_pred, labels, const.AVG_MICRO)
|
||||
#f1_score_macro = f1_score(y_true, y_pred, labels, const.AVG_MACRO)
|
||||
#f1_score_none = f1_score(y_true, y_pred, labels, None)
|
||||
|
||||
## Print F1 Score
|
||||
#for result in f1_score_micro + f1_score_macro + f1_score_none:
|
||||
# result.print_result()
|
||||
@ -0,0 +1,33 @@
|
||||
import numpy as np
|
||||
from itertools import chain
|
||||
from utils.csv import read_csv_file
|
||||
|
||||
|
||||
def read_csv_nn_dev(scicite_dir=None):
|
||||
|
||||
dev_file_path = 'data/tsv/dev.tsv'
|
||||
dev_raw = read_csv_file(dev_file_path, '\t')
|
||||
|
||||
features = [x.features for x in dev_raw]
|
||||
features_unique = list(set(chain.from_iterable(features)))
|
||||
nobs = len(features)
|
||||
nfeats = len(features_unique)
|
||||
|
||||
X_dev = np.zeros((nobs, nfeats))
|
||||
|
||||
for j in range(nfeats):
|
||||
f = features_unique[j]
|
||||
for i in range(nobs):
|
||||
if f in features[i]:
|
||||
X_dev[i,j] = 1
|
||||
|
||||
y_dev_raw = np.array([x.true_label for x in dev_raw])
|
||||
y_unique = sorted(list(set(y_dev_raw)))
|
||||
y_dim = len(y_unique)
|
||||
y_dev = np.zeros((nobs,y_dim))
|
||||
|
||||
for j in range(y_dim):
|
||||
y_dev[:,j] = y_dev_raw == y_unique[j]
|
||||
|
||||
return X_dev, y_dev
|
||||
|
||||
Loading…
Reference in new issue