parent
18b7847bcf
commit
e9b1f31c49
@ -1,43 +1,49 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from iterables import chain
|
from itertools import chain
|
||||||
from utils.csv import read_csv_file
|
from utils.csv import read_csv_file
|
||||||
|
|
||||||
train_file_path = 'data/tsv/train.tsv'
|
# TODO: clean up, transform into class, allow for command-line arguments
|
||||||
test_file_path = 'data/tsv/test.tsv'
|
|
||||||
train_raw = read_csv_file(train_file_path, '\t')
|
def read_csv_nn(scicite_dir=None):
|
||||||
|
|
||||||
features = [x.features for x in train_raw]
|
train_file_path = 'data/tsv/train.tsv'
|
||||||
features_unique = list(set(chain.from_iterable(features)))
|
test_file_path = 'data/tsv/test.tsv'
|
||||||
nobs = len(features)
|
train_raw = read_csv_file(train_file_path, '\t')
|
||||||
nfeats = len(features_unique)
|
|
||||||
|
features = [x.features for x in train_raw]
|
||||||
X_train = np.zeros((nobs, nfeats))
|
features_unique = list(set(chain.from_iterable(features)))
|
||||||
|
nobs = len(features)
|
||||||
for j in range(nfeats):
|
nfeats = len(features_unique)
|
||||||
f = features_unique[j]
|
|
||||||
for i in range(nobs):
|
X_train = np.zeros((nobs, nfeats))
|
||||||
if f in features[i]:
|
|
||||||
X_train[i,j] = 1
|
for j in range(nfeats):
|
||||||
|
f = features_unique[j]
|
||||||
y_train_raw = np.array([x.true_label for x in train_raw])
|
for i in range(nobs):
|
||||||
y_unique = sorted(list(set(y_train_raw)))
|
if f in features[i]:
|
||||||
y_dim = len(y_unique)
|
X_train[i,j] = 1
|
||||||
y_train = np.zeros((nobs,y_dim))
|
|
||||||
|
y_train_raw = np.array([x.true_label for x in train_raw])
|
||||||
for j in range(y_dim):
|
y_unique = sorted(list(set(y_train_raw)))
|
||||||
y_train[:,j] = y_raw == y_unique[j]
|
y_dim = len(y_unique)
|
||||||
|
y_train = np.zeros((nobs,y_dim))
|
||||||
test_raw = read_csv_file(test_file_path, '\t')
|
|
||||||
features = [x.features for x in test_raw]
|
for j in range(y_dim):
|
||||||
#features_unique = list(set(chain.from_iterable(features)))
|
y_train[:,j] = y_train_raw == y_unique[j]
|
||||||
nobs = len(features)
|
|
||||||
nfeats = len(features_unique)
|
test_raw = read_csv_file(test_file_path, '\t')
|
||||||
|
features = [x.features for x in test_raw]
|
||||||
X_test = np.zeros((nobs, nfeats))
|
#features_unique = list(set(chain.from_iterable(features)))
|
||||||
for j in range(nfeats):
|
nobs = len(features)
|
||||||
f = features_unique[j]
|
nfeats = len(features_unique)
|
||||||
for i in range(nobs):
|
|
||||||
if f in features[i]:
|
X_test = np.zeros((nobs, nfeats))
|
||||||
X_test[i,j] = 1
|
for j in range(nfeats):
|
||||||
|
f = features_unique[j]
|
||||||
|
for i in range(nobs):
|
||||||
|
if f in features[i]:
|
||||||
|
X_test[i,j] = 1
|
||||||
|
|
||||||
|
return X_train, y_train, X_test
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in new issue