Perceptron and Multi-Class Perceptron done

isaac
Pavan Mandava 6 years ago
parent c915db6fc5
commit 89f6cfdf88

@ -1,86 +1,198 @@
# initialization procedure: https://towardsdatascience.com/weight-initialization-techniques-in-neural-networks-26c649eb3b78
from utils.models import DataInstance
from feature_extraction.features import FEATURE_LIST, THETA_BIAS_FEATURE
from collections import OrderedDict
import random
class Perceptron:
def __init__(self, label, input_dim, output_dim, step_size, num_classes=1):
self.classifier_label = label
self.input_len = input_dim
self.output_len = output_dim
self.sigmoid = lambda z : 1/(1+exp(-z))
self.num_classes = num_classes
self.multi_class = num_classes > 1
self.vexp = np.vectorize(exp)
"""
Perceptron is an algorithm for supervised learning of binary classifiers,
which can decide whether or not an input(features) belongs to some specific class.
It's a linear classifier, which makes predictions by combining weights with feature vector.
"""
def __init__(self, label: str, weights: dict, theta_bias: float):
"""
:type label: str
:type weights: dict
:type theta_bias: float
:param label: Label for the Perceptron Classifier (useful while dealing with Multi-Class Perceptron)
:param weights: dictionary of feature name and feature weights(random number)
:param theta_bias: value of the theta bias variable, threshold weight in other words
"""
self.label = label
self.weights = weights
self.theta_bias = theta_bias
def score(self, features: list):
"""
This function takes the list of features as parameter and
computes score by adding all the weights that corresponds to these features
:type features: list
:param features: list of features from a DataInstance
:return: returns the computed score
"""
score_val = 0
for feature in features:
score_val += self.weights[feature]
return score_val
def fit(self, X, y, weights=None, step_size=0.01, batch_size=10):
def update_weights(self, features: list, learning_rate: float = 1, penalize: bool = None, reward: bool = None):
"""
initializes training data and hyperparameters
This function is used to update weights during the training of the Perceptron Classifier.
It takes a list of features as parameter and updates(either increase or decrease) the
weights for these individual features based on learning rate parameter
:param features: list of features from Input DataInstance
:param learning_rate: Default is 1
:param penalize: If True, decreases the weights for each feature. Default is None
:param reward: If True, increases the weights for each feature. Default is None
- If both penalize and reward are None, weights will not get updated.
- If both penalize and reward are True without learning rate(or learning rate 1),
weights for the features remain the same.
"""
# init weights and step_size
assert X.shape[0] == y.shape[0]
self.train_nobs = X.shape[0]
if weights not None:
self.W = weights
else:
self.W = np.random.randn(self.input_len, self.num_classes)*sqrt(2/(1+self.input_len))
self.step_size = step_size
self.batch_size = batch_size
self. shuffler = np.random.randn(self.train_nobs)
self.X = X[self.shuffler]
self.y = y[self.shuffler]
def predict(self, X):
for feature in features:
feature_weight = self.weights[feature]
if penalize:
self.weights[feature] = feature_weight - (learning_rate * 1)
if reward:
self.weights[feature] = feature_weight + (learning_rate * 1)
class MultiClassPerceptron:
"""
Perceptron is a binary classifier, can only separate between two classes.
Multi-Class Perceptron can be used, where multiple labels can be assigned to each data instance.
Multi-Class Perceptron creates one Perceptron Classifier for each label, while training
it takes the score for each label(from Perceptron Classifier) and
the label with the highest score is the predicted label
If the predicted label is different from true label of data instance,
this model updates the weights as follows:
- decrease the weights for the Perceptron Classifier of predicted label (penalize)
- increase the weights for the Perceptron Classifier of true label (reward)
This model also shuffles the training data after each epoch.
"""
def __init__(self, epochs: int = 2000, learning_rate: float = 1):
"""
takes a test set and returns predictions
:type epochs: int
:type learning_rate: float
:param epochs: number of training iterations
:param learning_rate: learning rate for updating weights, Default is 1
"""
if self.multi_class:
return self.softmax(X.dot(self.W))
else:
return self.sigmoid(X.dot(self.W))
self.perceptron_dict = OrderedDict() # contains Key : label and value : Perceptron Object for label
self.epochs = epochs
self.learning_rate = learning_rate
def train(self, num_epochs=1, cost_funct='cross_ent'):
def fit(self, X_train: list, labels: list):
"""
implements backpropagation algorithm
This function takes the training data and labels as parameters and trains the model
:type X_train: list[DataInstance]
:type labels: list[str]
:param X_train: list of training Data Instances
:param labels: list of classes
"""
batches = [(n,n+self.batch_size) for n in range(self.input_len)]
for a,b in batches:
XW = X.dot(self.W)
preds = self.predict(self.X[a:b])
#cost = self.cost(self.y[a:b], preds, funct=cost_funct)
cost_deriv = preds - self.y
self.W = self.W - self.step_size *
if self.multi_class:
act_deriv = self.soft_deriv(XW)
else:
act_deriv = self.sigmoid(XW)(1-self.sigmoid(XW))
update = X.dot(act_deriv).dot(cost_deriv)
self.W = self.W - self.step_size * update
def softmax(self, vector):
denom = np.sum(self.vexp(vector))
return np.array(self.vexp(exp))/denom
def cost(self, y, yhat, funct='cross_ent'):
if funct == 'cross_ent':
return np.sum(np.vectorize(log)(yhat) * y)
def soft_deriv(self, inputs):
size = max(*inputs.shape)
deriv = np.zeros((size,size))
for i in range(size):
for j in range(size):
if i==j:
deriv[i,j] = self.sigmoid(inputs[j])(1-self.sigmoid(inputs[i]))
else:
deriv[i, j] = -self.sigmoid(inputs[j]) * self.sigmoid(inputs[i])
return deriv
#class MultiClassPerceptron(Perceptron):
# def __init__(self):
# pass
# Check if labels parameter is empty and raise Exception
if labels is None or len(labels) <= 0:
raise Exception('The labels parameter must contain at least one label')
# Check if Training Data is empty and raise Exception
if X_train is None or len(X_train) <= 0:
raise Exception('Training data can\'t be Empty')
# Check the data type of training Instances
if not isinstance(X_train, list) and not isinstance(X_train[0], DataInstance):
raise Exception('Training Data must be a list of type DataInstance(model)')
train_len = len(X_train)
# Dictionary for storing label->Perceptron() objects, Create a new Perceptron object for each label
for label in labels:
self.perceptron_dict[label] = Perceptron(label, get_sample_weights_with_features(-0.5), -0.5)
# Training Iterations
for epoch in range(self.epochs):
# get a random number within the size of training set
rand_num = random.randint(0, train_len)
# pick a random data instance with the generated random number
inst = X_train[rand_num]
perceptron_scores = [] # list for storing perceptron scores for each label
for label, perceptron in self.perceptron_dict.items():
perceptron_scores.append(perceptron.score(inst.features))
# find the max score from the list of scores
max_score = max(perceptron_scores)
# find the label that corresponds to max score
label_max_score = labels[perceptron_scores.index(max_score)]
# if the label with max score is different from the label of this data instance,
# then decrease the weights(penalize) for the Perceptron of label with max score
# and increase the weights(reward) for the Perceptron of data instance label
if inst.true_label != label_max_score:
# decrease weights
self.perceptron_dict[label_max_score].update_weights(inst.features, self.learning_rate, penalize=True)
# increase weights
self.perceptron_dict[inst.true_label].update_weights(inst.features, self.learning_rate, reward=True)
# It's important to shuffle the data during every epoch
random.shuffle(X_train)
def predict(self, X_test: list):
"""
This function takes testing instances as parameters and assigns a predicted label.
Takes the score from each Perceptron Classifier, label with the highest score is the predicted label
:param X_test: list of test data instances
:return: list of predicted labels
"""
if X_test is None or len(X_test) <= 0:
raise Exception('Testing Data cannot be empty')
y_test = []
labels = list(self.perceptron_dict.keys())
for test_inst in X_test:
perceptron_scores = [] # list for storing perceptron scores for each label
for label in labels:
perceptron_scores.append(self.perceptron_dict[label].score(test_inst.features))
# find the max score from the list of scores
max_score = max(perceptron_scores)
label_max_score = labels[perceptron_scores.index(max_score)]
y_test.append(label_max_score)
return y_test
def get_sample_weights_with_features(theta_bias: float = None):
"""
This function creates a dictionary with feature as a key and a random floating number (feature weight) as value.
Weights for each feature is a floating number between -1 and 1
:return: returns a dictionary of random weights for each feature
"""
weights = {THETA_BIAS_FEATURE: theta_bias}
for feature in FEATURE_LIST:
weights[feature] = round(random.uniform(-1.0, 1.0), 4)
return weights

@ -6,6 +6,9 @@ FEATURE_LIST = ['COMPARE', 'CONTRAST', 'RESULT', 'INCREASE', 'CHANGE', 'USE', 'P
'APPROACH', 'PUBLIC', 'BEFORE', 'BETTER_SOLUTION', 'PROFESSIONALS', 'MEDICINE', 'MATH', 'CITATION',
'ACRONYM', 'CONTAINS_YEAR', 'SEQUENCE', 'REFERENCE', 'PERCENTAGE', 'URL']
""" Feature Name for Theta Bias -- need to add it to the list of features for all data instances """
THETA_BIAS_FEATURE = 'THETA_BIAS'
def extract_features_from_text(text: str):
"""
@ -18,12 +21,14 @@ def extract_features_from_text(text: str):
# ALL_LEXICONS
lexicon_dict = lexicons.ALL_LEXICONS
text_feature_list = []
# Initialize the feature list with Theta Bias feature, this feature must be added to all data instances
text_feature_list = [THETA_BIAS_FEATURE]
# Iterate through the list features and get list of words from the lexicon dictionary,
# for each word in the word list, check if it appears in input text and add it to the text feature list
for feature in FEATURE_LIST:
# If the feature is Regex Pattern Match, get the pattern from :`~feature_extraction.lexicons.ALL_LEXICONS`
# If the feature is Regex Pattern Match, get the pattern from :`~utils.constants.REGEX_CONSTANTS`
# and match it with the input text
if feature in REGEX_CONSTANTS:
pattern = REGEX_CONSTANTS[feature]
@ -31,7 +36,7 @@ def extract_features_from_text(text: str):
text_feature_list.append(feature)
continue
# If the feature is not a Regex Pattern Match, then get the list of dictionary words from lexicon dictionary
# If the feature is not Regex Pattern Match, then get the list of dictionary words from lexicon dictionary
word_list = lexicon_dict[feature]
for word in word_list:
if word in text.lower():

@ -0,0 +1,3 @@
from classifier.linear_model import get_sample_weights_with_features
print(get_sample_weights_with_features())

@ -11,6 +11,7 @@ class DataInstance:
self.did = r_id
self.text = text
self.true_label = true_label
self.predicted_label = None
self.features = extract_features_from_text(text)
def print(self):

Loading…
Cancel
Save