finished basic ff model and torch data reader

isaac
Isaac Riley 6 years ago
parent 18b7847bcf
commit e9b1f31c49

@ -1,9 +1,10 @@
from utils.nn_reader import read_csv """
Simple feed-forward neural network in PyTorch for baseline results on Scicite data.
Date: July 5th, 2020
"""
import torch import torch
from utils.nn_reader import read_csv_nn
class Feedforward(torch.nn.Module): class Feedforward(torch.nn.Module):
@ -26,63 +27,41 @@ class Feedforward(torch.nn.Module):
return output return output
"""
from sklearn.datasets import make_blobs
def blob_label(y, label, loc): # assign labels
target = numpy.copy(y)
for l in loc:
target[y == l] = label
return target
X_train, y_train = make_blobs(n_samples=40, n_features=2, cluster_std=1.5, shuffle=True)
X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(blob_label(y_train, 0, [0]))
y_train = torch.FloatTensor(blob_label(y_train, 1, [1,2,3]))
x_test, y_test = make_blobs(n_samples=10, n_features=2, cluster_std=1.5, shuffle=True)
x_test = torch.FloatTensor(x_test)
y_test = torch.FloatTensor(blob_label(y_test, 0, [0]))
y_test = torch.FloatTensor(blob_label(y_test, 1, [1,2,3]))
"""
X_train = torch.as_tensor(X_train)
X_test = torch.as_tensor(X_test)
y_train = torch.as_tensor(y_train)
model = Feedforward(28, 9, 3)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)
if __name__=='__main__':
X_train, y_train, X_test = read_csv_nn()
model.eval() X_train = torch.FloatTensor(X_train)
y_pred = model(X_train) X_test = torch.FloatTensor(X_test)
y_pred = torch.Tensor([list(x).index(x.max()) for x in y_pred]) y_train_ = torch.FloatTensor(y_train)
y_pred = y_train = torch.max(torch.FloatTensor(y_train_),1)[1]
before_train = criterion(y_train, y_pred)
print('Test loss before training' , before_train.item())
model = Feedforward(28, 9, 3)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)
model.eval()
model.train()
epoch = 20
for epoch in range(epoch):
optimizer.zero_grad()
# Forward pass
y_pred = model(X_train) y_pred = model(X_train)
# Compute Loss before_train = criterion(y_pred, y_train)
loss = criterion(y_pred.squeeze(), y_train) print('Test loss before training' , before_train.item())
print('Epoch {}: train loss: {}'.format(epoch, loss.item())) model.train()
# Backward pass epoch = 2000
loss.backward() for epoch in range(epoch):
optimizer.step() optimizer.zero_grad()
# forward pass
y_pred = model(X_train)
model.eval() loss = criterion(y_pred, y_train)
y_pred = model(X_test)
after_train = criterion(y_pred.squeeze(), y_test) print('Epoch {}: train loss: {}'.format(epoch, loss.item()))
print('Test loss after Training' , after_train.item()) # backward pass
loss.backward()
optimizer.step()
model.eval()
y_pred = model(X_train)
after_train = criterion(y_pred, y_train)
print('Training loss after training' , after_train.item())

@ -1,43 +1,49 @@
import numpy as np import numpy as np
from iterables import chain from itertools import chain
from utils.csv import read_csv_file from utils.csv import read_csv_file
train_file_path = 'data/tsv/train.tsv' # TODO: clean up, transform into class, allow for command-line arguments
test_file_path = 'data/tsv/test.tsv'
train_raw = read_csv_file(train_file_path, '\t') def read_csv_nn(scicite_dir=None):
features = [x.features for x in train_raw] train_file_path = 'data/tsv/train.tsv'
features_unique = list(set(chain.from_iterable(features))) test_file_path = 'data/tsv/test.tsv'
nobs = len(features) train_raw = read_csv_file(train_file_path, '\t')
nfeats = len(features_unique)
features = [x.features for x in train_raw]
X_train = np.zeros((nobs, nfeats)) features_unique = list(set(chain.from_iterable(features)))
nobs = len(features)
for j in range(nfeats): nfeats = len(features_unique)
f = features_unique[j]
for i in range(nobs): X_train = np.zeros((nobs, nfeats))
if f in features[i]:
X_train[i,j] = 1 for j in range(nfeats):
f = features_unique[j]
y_train_raw = np.array([x.true_label for x in train_raw]) for i in range(nobs):
y_unique = sorted(list(set(y_train_raw))) if f in features[i]:
y_dim = len(y_unique) X_train[i,j] = 1
y_train = np.zeros((nobs,y_dim))
y_train_raw = np.array([x.true_label for x in train_raw])
for j in range(y_dim): y_unique = sorted(list(set(y_train_raw)))
y_train[:,j] = y_raw == y_unique[j] y_dim = len(y_unique)
y_train = np.zeros((nobs,y_dim))
test_raw = read_csv_file(test_file_path, '\t')
features = [x.features for x in test_raw] for j in range(y_dim):
#features_unique = list(set(chain.from_iterable(features))) y_train[:,j] = y_train_raw == y_unique[j]
nobs = len(features)
nfeats = len(features_unique) test_raw = read_csv_file(test_file_path, '\t')
features = [x.features for x in test_raw]
X_test = np.zeros((nobs, nfeats)) #features_unique = list(set(chain.from_iterable(features)))
for j in range(nfeats): nobs = len(features)
f = features_unique[j] nfeats = len(features_unique)
for i in range(nobs):
if f in features[i]: X_test = np.zeros((nobs, nfeats))
X_test[i,j] = 1 for j in range(nfeats):
f = features_unique[j]
for i in range(nobs):
if f in features[i]:
X_test[i,j] = 1
return X_train, y_train, X_test

Loading…
Cancel
Save