From a3a3043bbb5d19b9c2c7c8211c92d4e55b4f8b12 Mon Sep 17 00:00:00 2001 From: Isaac Riley Date: Mon, 4 May 2020 01:10:20 +0200 Subject: [PATCH 1/2] added micro-f1 code and assert statement --- eval/metrics.py | 6 ++---- testing/eval_testing.py | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/eval/metrics.py b/eval/metrics.py index feacd0a..1e74fb2 100644 --- a/eval/metrics.py +++ b/eval/metrics.py @@ -17,7 +17,7 @@ def f1_score(y_true, y_pred, labels, average): :return: returns a list of Result class objects. Use :func:`~eval.metrics.Result.print_result` to print F1 Score on the Console """ - + assert len(list(y_true))==len(list(y_pred)) if average is None or average == const.AVG_MACRO: pr_list = get_precision_recall(y_true, y_pred, labels) f1_score_list = [] @@ -36,9 +36,7 @@ def f1_score(y_true, y_pred, labels, average): return [Result(None, None, average, None, f1_sum / len(pr_list))] elif average == const.AVG_MICRO: - print('test test test') - print("another test comment") - pass + return sum([a==b for a,b in zip(y_true, y_pred)]) return None diff --git a/testing/eval_testing.py b/testing/eval_testing.py index 89782fa..42bb5e1 100644 --- a/testing/eval_testing.py +++ b/testing/eval_testing.py @@ -4,7 +4,7 @@ import utils.constants as const y_true = ['positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative'] y_pred = ['positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'negative', 'negative'] -result_list = f1_score(y_true, y_pred, ['positive', 'negative'], const.AVG_MICRO) +result_list = f1_score(y_true, y_pred, ['positive', 'negative'], None) for result in result_list: result.print_result() \ No newline at end of file From 5d0ccaf111cbb12c4c6960181403a5b5ce07515e Mon Sep 17 00:00:00 2001 From: Isaac Riley Date: Mon, 11 May 2020 02:49:55 +0200 Subject: [PATCH 2/2] created basic skeleton for perceptron (single & multi) --- classifier/linear_model.py | 85 ++++++++++++++++++++++++++++++++++---- 1 file changed, 77 insertions(+), 8 deletions(-) diff --git a/classifier/linear_model.py b/classifier/linear_model.py index 99fd07c..d0d538b 100644 --- a/classifier/linear_model.py +++ b/classifier/linear_model.py @@ -1,17 +1,86 @@ +# initialization procedure: https://towardsdatascience.com/weight-initialization-techniques-in-neural-networks-26c649eb3b78 + class Perceptron: - def __init__(self, label): + def __init__(self, label, input_dim, output_dim, step_size, num_classes=1): self.classifier_label = label - pass + self.input_len = input_dim + self.output_len = output_dim + self.sigmoid = lambda z : 1/(1+exp(-z)) + self.num_classes = num_classes + self.multi_class = num_classes > 1 + self.vexp = np.vectorize(exp) + + + + + def fit(self, X, y, weights=None, step_size=0.01, batch_size=10): + """ + initializes training data and hyperparameters + """ + # init weights and step_size + assert X.shape[0] == y.shape[0] + self.train_nobs = X.shape[0] + if weights not None: + self.W = weights + else: + self.W = np.random.randn(self.input_len, self.num_classes)*sqrt(2/(1+self.input_len)) + self.step_size = step_size + self.batch_size = batch_size + self. shuffler = np.random.randn(self.train_nobs) + self.X = X[self.shuffler] + self.y = y[self.shuffler] + - def fit(self, X, y, weights=None): - pass def predict(self, X): - pass + """ + takes a test set and returns predictions + """ + if self.multi_class: + return self.softmax(X.dot(self.W)) + else: + return self.sigmoid(X.dot(self.W)) + + def train(self, num_epochs=1, cost_funct='cross_ent'): + """ + implements backpropagation algorithm + """ + batches = [(n,n+self.batch_size) for n in range(self.input_len)] + for a,b in batches: + XW = X.dot(self.W) + preds = self.predict(self.X[a:b]) + #cost = self.cost(self.y[a:b], preds, funct=cost_funct) + cost_deriv = preds - self.y + self.W = self.W - self.step_size * + if self.multi_class: + act_deriv = self.soft_deriv(XW) + else: + act_deriv = self.sigmoid(XW)(1-self.sigmoid(XW)) + update = X.dot(act_deriv).dot(cost_deriv) + self.W = self.W - self.step_size * update + + + def softmax(self, vector): + denom = np.sum(self.vexp(vector)) + return np.array(self.vexp(exp))/denom + + def cost(self, y, yhat, funct='cross_ent'): + if funct == 'cross_ent': + return np.sum(np.vectorize(log)(yhat) * y) + def soft_deriv(self, inputs): + size = max(*inputs.shape) + deriv = np.zeros((size,size)) + for i in range(size): + for j in range(size): + if i==j: + deriv[i,j] = self.sigmoid(inputs[j])(1-self.sigmoid(inputs[i])) + else: + deriv[i, j] = -self.sigmoid(inputs[j]) * self.sigmoid(inputs[i]) + return deriv -class MultiClassPerceptron: +#class MultiClassPerceptron(Perceptron): - def __init__(self): - pass +# def __init__(self): +# pass