Perceptron and Multi-Class Perceptron done

6 years ago · 89f6cfdf88
parent c915db6fc5
commit 89f6cfdf88
4 changed files with 196 additions and 75 deletions
--- a/classifier/linear_model.py
+++ b/classifier/linear_model.py
@ -1,86 +1,198 @@
-# initialization procedure: https://towardsdatascience.com/weight-initialization-techniques-in-neural-networks-26c649eb3b78
+from utils.models import DataInstance
 from feature_extraction.features import FEATURE_LIST, THETA_BIAS_FEATURE
 from collections import OrderedDict
 import random
 class Perceptron:
-    def __init__(self, label, input_dim, output_dim, step_size, num_classes=1):
+    """
-        self.classifier_label = label
+    Perceptron is an algorithm for supervised learning of binary classifiers,
-        self.input_len = input_dim
+    which can decide whether or not an input(features) belongs to some specific class.
-        self.output_len = output_dim
+    It's a linear classifier, which makes predictions by combining weights with feature vector.
-        self.sigmoid = lambda z : 1/(1+exp(-z))
+    """
-        self.num_classes = num_classes
+
-        self.multi_class = num_classes > 1
+    def __init__(self, label: str, weights: dict, theta_bias: float):
-        self.vexp = np.vectorize(exp)
+        """
        :type label: str
        :type weights: dict
        :type theta_bias: float
        :param label:  Label for the Perceptron Classifier (useful while dealing with Multi-Class Perceptron)
        :param weights: dictionary of feature name and feature weights(random number)
        :param theta_bias: value of the theta bias variable, threshold weight in other words
        """
        self.label = label
        self.weights = weights
        self.theta_bias = theta_bias
    def score(self, features: list):
        """
        This function takes the list of features as parameter and
        computes score by adding all the weights that corresponds to these features
        :type features: list
        :param features: list of features from a DataInstance
        :return: returns the computed score
        """
        score_val = 0
        for feature in features:
            score_val += self.weights[feature]
        return score_val
    def update_weights(self, features: list, learning_rate: float = 1, penalize: bool = None, reward: bool = None):
        """
        This function is used to update weights during the training of the Perceptron Classifier.
        It takes a list of features as parameter and updates(either increase or decrease) the
        weights for these individual features based on learning rate parameter
        :param features: list of features from Input DataInstance
        :param learning_rate: Default is 1
        :param penalize: If True, decreases the weights for each feature. Default is None
        :param reward: If True, increases the weights for each feature. Default is None
        - If both penalize and reward are None, weights will not get updated.
        - If both penalize and reward are True without learning rate(or learning rate 1),
            weights for the features remain the same.
    def fit(self, X, y, weights=None, step_size=0.01, batch_size=10):
        """
-        initializes training data and hyperparameters
+
        for feature in features:
            feature_weight = self.weights[feature]
            if penalize:
                self.weights[feature] = feature_weight - (learning_rate * 1)
            if reward:
                self.weights[feature] = feature_weight + (learning_rate * 1)
 class MultiClassPerceptron:
    """
    Perceptron is a binary classifier, can only separate between two classes.
    Multi-Class Perceptron can be used, where multiple labels can be assigned to each data instance.
    Multi-Class Perceptron creates one Perceptron Classifier for each label, while training
     it takes the score for each label(from Perceptron Classifier) and
     the label with the highest score is the predicted label
     If the predicted label is different from true label of data instance,
     this model updates the weights as follows:
        - decrease the weights for the Perceptron Classifier of predicted label (penalize)
        - increase the weights for the Perceptron Classifier of true label (reward)
     This model also shuffles the training data after each epoch.
    """
        # init weights and step_size
        assert X.shape[0] == y.shape[0]
        self.train_nobs = X.shape[0]
        if weights not None:
            self.W = weights
        else:
            self.W = np.random.randn(self.input_len, self.num_classes)*sqrt(2/(1+self.input_len))
        self.step_size = step_size
        self.batch_size = batch_size
        self. shuffler = np.random.randn(self.train_nobs)
        self.X = X[self.shuffler]
        self.y = y[self.shuffler]
    def __init__(self, epochs: int = 2000, learning_rate: float = 1):
        """
        :type epochs: int
        :type learning_rate: float
        :param epochs: number of training iterations
        :param learning_rate: learning rate for updating weights, Default is 1
        """
        self.perceptron_dict = OrderedDict()  # contains Key : label and value : Perceptron Object for label
        self.epochs = epochs
        self.learning_rate = learning_rate
-    def predict(self, X):
+    def fit(self, X_train: list, labels: list):
        """
-        takes a test set and returns predictions
+        This function takes the training data and labels as parameters and trains the model
        :type X_train: list[DataInstance]
        :type labels: list[str]
        :param X_train: list of training Data Instances
        :param labels: list of classes
        """
        if self.multi_class:
            return self.softmax(X.dot(self.W))
        else:
            return self.sigmoid(X.dot(self.W))
-    def train(self, num_epochs=1, cost_funct='cross_ent'):
+        # Check if labels parameter is empty and raise Exception
        if labels is None or len(labels) <= 0:
            raise Exception('The labels parameter must contain at least one label')
        # Check if Training Data is empty and raise Exception
        if X_train is None or len(X_train) <= 0:
            raise Exception('Training data can\'t be Empty')
        # Check the data type of training Instances
        if not isinstance(X_train, list) and not isinstance(X_train[0], DataInstance):
            raise Exception('Training Data must be a list of type DataInstance(model)')
        train_len = len(X_train)
        # Dictionary for storing label->Perceptron() objects, Create a new Perceptron object for each label
        for label in labels:
            self.perceptron_dict[label] = Perceptron(label, get_sample_weights_with_features(-0.5), -0.5)
        # Training Iterations
        for epoch in range(self.epochs):
            # get a random number within the size of training set
            rand_num = random.randint(0, train_len)
            # pick a random data instance with the generated random number
            inst = X_train[rand_num]
            perceptron_scores = []  # list for storing perceptron scores for each label
            for label, perceptron in self.perceptron_dict.items():
                perceptron_scores.append(perceptron.score(inst.features))
            # find the max score from the list of scores
            max_score = max(perceptron_scores)
            # find the label that corresponds to max score
            label_max_score = labels[perceptron_scores.index(max_score)]
            # if the label with max score is different from the label of this data instance,
            # then decrease the weights(penalize) for the Perceptron of label with max score
            # and increase the weights(reward) for the Perceptron of data instance label
            if inst.true_label != label_max_score:
                # decrease weights
                self.perceptron_dict[label_max_score].update_weights(inst.features, self.learning_rate, penalize=True)
                # increase weights
                self.perceptron_dict[inst.true_label].update_weights(inst.features, self.learning_rate, reward=True)
            # It's important to shuffle the data during every epoch
            random.shuffle(X_train)
    def predict(self, X_test: list):
        """
-        implements backpropagation algorithm
+        This function takes testing instances as parameters and assigns a predicted label.
        Takes the score from each Perceptron Classifier, label with the highest score is the predicted label
        :param X_test: list of test data instances
        :return: list of predicted labels
        """
        batches = [(n,n+self.batch_size) for n in range(self.input_len)]
        for a,b in batches:
            XW = X.dot(self.W)
            preds = self.predict(self.X[a:b])
            #cost = self.cost(self.y[a:b], preds, funct=cost_funct)
            cost_deriv = preds - self.y
            self.W = self.W - self.step_size *
            if self.multi_class:
                act_deriv = self.soft_deriv(XW)
            else:
                act_deriv = self.sigmoid(XW)(1-self.sigmoid(XW))
            update = X.dot(act_deriv).dot(cost_deriv)
            self.W = self.W - self.step_size * update
        if X_test is None or len(X_test) <= 0:
            raise Exception('Testing Data cannot be empty')
        y_test = []
        labels = list(self.perceptron_dict.keys())
        for test_inst in X_test:
            perceptron_scores = []  # list for storing perceptron scores for each label
            for label in labels:
                perceptron_scores.append(self.perceptron_dict[label].score(test_inst.features))
            # find the max score from the list of scores
            max_score = max(perceptron_scores)
-    def softmax(self, vector):
+            label_max_score = labels[perceptron_scores.index(max_score)]
-        denom = np.sum(self.vexp(vector))
+            y_test.append(label_max_score)
        return np.array(self.vexp(exp))/denom
-    def cost(self, y, yhat, funct='cross_ent'):
+        return y_test
        if funct == 'cross_ent':
            return np.sum(np.vectorize(log)(yhat) * y)
    def soft_deriv(self, inputs):
        size = max(*inputs.shape)
        deriv = np.zeros((size,size))
        for i in range(size):
            for j in range(size):
                if i==j:
                    deriv[i,j] = self.sigmoid(inputs[j])(1-self.sigmoid(inputs[i]))
                else:
                    deriv[i, j] = -self.sigmoid(inputs[j]) * self.sigmoid(inputs[i])
        return deriv
-#class MultiClassPerceptron(Perceptron):
+def get_sample_weights_with_features(theta_bias: float = None):
    """
    This function creates a dictionary with feature as a key and a random floating number (feature weight) as value.
    Weights for each feature is a floating number between -1 and 1
    :return: returns a dictionary of random weights for each feature
    """
    weights = {THETA_BIAS_FEATURE: theta_bias}
    for feature in FEATURE_LIST:
        weights[feature] = round(random.uniform(-1.0, 1.0), 4)
-#    def __init__(self):
+    return weights
 #        pass
--- a/feature_extraction/features.py
+++ b/feature_extraction/features.py
@ -6,6 +6,9 @@ FEATURE_LIST = ['COMPARE', 'CONTRAST', 'RESULT', 'INCREASE', 'CHANGE', 'USE', 'P
                'APPROACH', 'PUBLIC', 'BEFORE', 'BETTER_SOLUTION', 'PROFESSIONALS', 'MEDICINE', 'MATH', 'CITATION',
                'ACRONYM', 'CONTAINS_YEAR', 'SEQUENCE', 'REFERENCE', 'PERCENTAGE', 'URL']
 """ Feature Name for Theta Bias -- need to add it to the list of features for all data instances """
 THETA_BIAS_FEATURE = 'THETA_BIAS'
 def extract_features_from_text(text: str):
    """
@ -18,12 +21,14 @@ def extract_features_from_text(text: str):
    # ALL_LEXICONS
    lexicon_dict = lexicons.ALL_LEXICONS
-    text_feature_list = []
+    # Initialize the feature list with Theta Bias feature, this feature must be added to all data instances
    text_feature_list = [THETA_BIAS_FEATURE]
    # Iterate through the list features and get list of words from the lexicon dictionary,
    # for each word in the word list, check if it appears in input text and add it to the text feature list
    for feature in FEATURE_LIST:
-        # If the feature is Regex Pattern Match, get the pattern from :`~feature_extraction.lexicons.ALL_LEXICONS`
+        # If the feature is Regex Pattern Match, get the pattern from :`~utils.constants.REGEX_CONSTANTS`
        # and match it with the input text
        if feature in REGEX_CONSTANTS:
            pattern = REGEX_CONSTANTS[feature]
@ -31,7 +36,7 @@ def extract_features_from_text(text: str):
                text_feature_list.append(feature)
            continue
-        # If the feature is not a Regex Pattern Match, then get the list of dictionary words from lexicon dictionary
+        # If the feature is not Regex Pattern Match, then get the list of dictionary words from lexicon dictionary
        word_list = lexicon_dict[feature]
        for word in word_list:
            if word in text.lower():
--- a/testing/model_testing.py
+++ b/testing/model_testing.py
@ -0,0 +1,3 @@
 from classifier.linear_model import get_sample_weights_with_features
 print(get_sample_weights_with_features())
--- a/utils/models.py
+++ b/utils/models.py
@ -11,6 +11,7 @@ class DataInstance:
        self.did = r_id
        self.text = text
        self.true_label = true_label
        self.predicted_label = None
        self.features = extract_features_from_text(text)
    def print(self):
		`@ -0,0 +1,3 @@`
							`from classifier.linear_model import get_sample_weights_with_features`

							`print(get_sample_weights_with_features())`