From 6575ba09523391e78ec2323e3c551dac3f77fc09 Mon Sep 17 00:00:00 2001
From: Pavan Mandava <mspavan04@gmail.com>
Date: Mon, 18 May 2020 00:13:22 +0200
Subject: [PATCH] Random state still not working

---
 classifier/linear_model.py | 33 +++++++++++++++++++++------------
 testing/model_testing.py   |  2 +-
 utils/constants.py         |  3 +--
 3 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/classifier/linear_model.py b/classifier/linear_model.py
index 3de308a..6e25f69 100644
--- a/classifier/linear_model.py
+++ b/classifier/linear_model.py
@@ -62,9 +62,9 @@ class Perceptron:
         for feature in features:
             feature_weight = self.weights[feature]
             if penalize:
-                self.weights[feature] = feature_weight - (learning_rate * 1)
+                self.weights[feature] = round(feature_weight - (learning_rate * 1), 5)
             if reward:
-                self.weights[feature] = feature_weight + (learning_rate * 1)
+                self.weights[feature] = round(feature_weight + (learning_rate * 1), 5)
 
 
 class MultiClassPerceptron:
@@ -85,7 +85,7 @@ class MultiClassPerceptron:
 
     """
 
-    def __init__(self, epochs: int = 5000, learning_rate: float = 1, random_state: int = 4):
+    def __init__(self, epochs: int = 5000, learning_rate: float = 1, random_state: int = 42):
         """
         :type epochs: int
         :type learning_rate: float
@@ -94,7 +94,7 @@ class MultiClassPerceptron:
         :param epochs: number of training iterations
         :param learning_rate: learning rate for updating weights, Default is 1
         :param random_state: random state for shuffling the data, useful for reproducing the results.
-                    Default is 4.
+                    Default is 42.
         """
         self.random_state = random_state
         self.perceptron_dict = OrderedDict()  # contains Key : label and value : Perceptron Object for label
@@ -128,10 +128,14 @@ class MultiClassPerceptron:
 
         # Dictionary for storing label->Perceptron() objects, Create a new Perceptron object for each label
         for label in labels:
-            self.perceptron_dict[label] = Perceptron(label, get_sample_weights_with_features(theta_bias=-0.5), theta_bias=-0.5)
+            sample_weights = get_sample_weights_with_features(theta_bias=0.9, random_state=self.random_state)
+            self.perceptron_dict[label] = Perceptron(label, sample_weights, theta_bias=0.9)
 
         next_print = int(self.epochs/10)
 
+        random.seed(self.random_state)
+        random_list = [random.randint(0, train_len-1) for i in range(self.epochs)]
+
         # Training Iterations
         for epoch in range(self.epochs):
 
@@ -139,10 +143,8 @@ class MultiClassPerceptron:
                 print('Training Multi-Class Perceptron Classifier..... (', epoch, '/', self.epochs, ')')
                 next_print = next_print + int(self.epochs/10)
 
-            # get a random number within the size of training set
-            rand_num = random.randint(0, train_len-1)
-            # pick a random data instance with the generated random number
-            inst = X_train[rand_num]
+            # Pick a number from random list
+            inst = X_train[random_list[epoch]]
 
             perceptron_scores = []  # list for storing perceptron scores for each label
             for label, perceptron in self.perceptron_dict.items():
@@ -163,7 +165,7 @@ class MultiClassPerceptron:
                 # increase weights
                 self.perceptron_dict[inst.true_label].update_weights(inst.features, self.learning_rate, reward=True)
 
-            # It's important to shuffle the data during every epoch
+            # It's important to shuffle the list during every epoch
             random.Random(self.random_state).shuffle(X_train)
 
     def predict(self, X_test: list):
@@ -196,15 +198,22 @@ class MultiClassPerceptron:
         return y_test
 
 
-def get_sample_weights_with_features(theta_bias: float = None):
+def get_sample_weights_with_features(theta_bias: float = None, random_state: int = 42):
     """
     This function creates a dictionary with feature as a key and a random floating number (feature weight) as value.
     Weights for each feature is a floating number between -1 and 1
 
+    :type theta_bias: float
+    :type random_state: int
+
+    :param theta_bias: value of theta bias variable
+    :param random_state: random seed number for reproducing the results
+
     :return: returns a dictionary of random weights for each feature
     """
     weights = {THETA_BIAS_FEATURE: theta_bias}
+    random.seed(random_state)
     for feature in FEATURE_LIST:
-        weights[feature] = round(random.uniform(-1.0, 1.0), 4)
+        weights[feature] = round(random.uniform(-1.0, 1.0), 5)
 
     return weights
diff --git a/testing/model_testing.py b/testing/model_testing.py
index b640f69..59f87b6 100644
--- a/testing/model_testing.py
+++ b/testing/model_testing.py
@@ -17,7 +17,7 @@ X_test_inst = read_csv_file(test_file_path, '\t')
 
 epochs = int(len(X_train_inst)*0.95)
 
-clf = MultiClassPerceptron(epochs=epochs, learning_rate=1, random_state=10)
+clf = MultiClassPerceptron(epochs=epochs, learning_rate=1, random_state=42)
 
 clf.fit(X_train=X_train_inst, labels=list(labels))
 
diff --git a/utils/constants.py b/utils/constants.py
index 8a55fdb..7476fb6 100644
--- a/utils/constants.py
+++ b/utils/constants.py
@@ -22,8 +22,7 @@ REGEX_CONSTANTS = {
     # Regex for matching percentages in the text -> 99% / 99.99% / 10 % / 23.98% / 10-20% / 25%-30%
     'PERCENTAGE': re.compile(r"\d[\d\.\-]+%"),
 
-    # Regex for matching URLs -> http://www.phrap.org/, http://www. , http://carcfordjournals. ,
-    # https://www.ims.uni-stuttgart.de/
+    # Regex for matching URLs -> http://www.phrap.org/, http://www. , http://carcfordjournals.
     'CONTAINS_URL': re.compile(r"https?://\S+"),
 
     'ENDS_WITH_RIDE': re.compile(r"ride\b"),