Random state still not working

isaac
Pavan Mandava 6 years ago
parent 3c0e4a411d
commit 6575ba0952

@ -62,9 +62,9 @@ class Perceptron:
for feature in features: for feature in features:
feature_weight = self.weights[feature] feature_weight = self.weights[feature]
if penalize: if penalize:
self.weights[feature] = feature_weight - (learning_rate * 1) self.weights[feature] = round(feature_weight - (learning_rate * 1), 5)
if reward: if reward:
self.weights[feature] = feature_weight + (learning_rate * 1) self.weights[feature] = round(feature_weight + (learning_rate * 1), 5)
class MultiClassPerceptron: class MultiClassPerceptron:
@ -85,7 +85,7 @@ class MultiClassPerceptron:
""" """
def __init__(self, epochs: int = 5000, learning_rate: float = 1, random_state: int = 4): def __init__(self, epochs: int = 5000, learning_rate: float = 1, random_state: int = 42):
""" """
:type epochs: int :type epochs: int
:type learning_rate: float :type learning_rate: float
@ -94,7 +94,7 @@ class MultiClassPerceptron:
:param epochs: number of training iterations :param epochs: number of training iterations
:param learning_rate: learning rate for updating weights, Default is 1 :param learning_rate: learning rate for updating weights, Default is 1
:param random_state: random state for shuffling the data, useful for reproducing the results. :param random_state: random state for shuffling the data, useful for reproducing the results.
Default is 4. Default is 42.
""" """
self.random_state = random_state self.random_state = random_state
self.perceptron_dict = OrderedDict() # contains Key : label and value : Perceptron Object for label self.perceptron_dict = OrderedDict() # contains Key : label and value : Perceptron Object for label
@ -128,10 +128,14 @@ class MultiClassPerceptron:
# Dictionary for storing label->Perceptron() objects, Create a new Perceptron object for each label # Dictionary for storing label->Perceptron() objects, Create a new Perceptron object for each label
for label in labels: for label in labels:
self.perceptron_dict[label] = Perceptron(label, get_sample_weights_with_features(theta_bias=-0.5), theta_bias=-0.5) sample_weights = get_sample_weights_with_features(theta_bias=0.9, random_state=self.random_state)
self.perceptron_dict[label] = Perceptron(label, sample_weights, theta_bias=0.9)
next_print = int(self.epochs/10) next_print = int(self.epochs/10)
random.seed(self.random_state)
random_list = [random.randint(0, train_len-1) for i in range(self.epochs)]
# Training Iterations # Training Iterations
for epoch in range(self.epochs): for epoch in range(self.epochs):
@ -139,10 +143,8 @@ class MultiClassPerceptron:
print('Training Multi-Class Perceptron Classifier..... (', epoch, '/', self.epochs, ')') print('Training Multi-Class Perceptron Classifier..... (', epoch, '/', self.epochs, ')')
next_print = next_print + int(self.epochs/10) next_print = next_print + int(self.epochs/10)
# get a random number within the size of training set # Pick a number from random list
rand_num = random.randint(0, train_len-1) inst = X_train[random_list[epoch]]
# pick a random data instance with the generated random number
inst = X_train[rand_num]
perceptron_scores = [] # list for storing perceptron scores for each label perceptron_scores = [] # list for storing perceptron scores for each label
for label, perceptron in self.perceptron_dict.items(): for label, perceptron in self.perceptron_dict.items():
@ -163,7 +165,7 @@ class MultiClassPerceptron:
# increase weights # increase weights
self.perceptron_dict[inst.true_label].update_weights(inst.features, self.learning_rate, reward=True) self.perceptron_dict[inst.true_label].update_weights(inst.features, self.learning_rate, reward=True)
# It's important to shuffle the data during every epoch # It's important to shuffle the list during every epoch
random.Random(self.random_state).shuffle(X_train) random.Random(self.random_state).shuffle(X_train)
def predict(self, X_test: list): def predict(self, X_test: list):
@ -196,15 +198,22 @@ class MultiClassPerceptron:
return y_test return y_test
def get_sample_weights_with_features(theta_bias: float = None): def get_sample_weights_with_features(theta_bias: float = None, random_state: int = 42):
""" """
This function creates a dictionary with feature as a key and a random floating number (feature weight) as value. This function creates a dictionary with feature as a key and a random floating number (feature weight) as value.
Weights for each feature is a floating number between -1 and 1 Weights for each feature is a floating number between -1 and 1
:type theta_bias: float
:type random_state: int
:param theta_bias: value of theta bias variable
:param random_state: random seed number for reproducing the results
:return: returns a dictionary of random weights for each feature :return: returns a dictionary of random weights for each feature
""" """
weights = {THETA_BIAS_FEATURE: theta_bias} weights = {THETA_BIAS_FEATURE: theta_bias}
random.seed(random_state)
for feature in FEATURE_LIST: for feature in FEATURE_LIST:
weights[feature] = round(random.uniform(-1.0, 1.0), 4) weights[feature] = round(random.uniform(-1.0, 1.0), 5)
return weights return weights

@ -17,7 +17,7 @@ X_test_inst = read_csv_file(test_file_path, '\t')
epochs = int(len(X_train_inst)*0.95) epochs = int(len(X_train_inst)*0.95)
clf = MultiClassPerceptron(epochs=epochs, learning_rate=1, random_state=10) clf = MultiClassPerceptron(epochs=epochs, learning_rate=1, random_state=42)
clf.fit(X_train=X_train_inst, labels=list(labels)) clf.fit(X_train=X_train_inst, labels=list(labels))

@ -22,8 +22,7 @@ REGEX_CONSTANTS = {
# Regex for matching percentages in the text -> 99% / 99.99% / 10 % / 23.98% / 10-20% / 25%-30% # Regex for matching percentages in the text -> 99% / 99.99% / 10 % / 23.98% / 10-20% / 25%-30%
'PERCENTAGE': re.compile(r"\d[\d\.\-]+%"), 'PERCENTAGE': re.compile(r"\d[\d\.\-]+%"),
# Regex for matching URLs -> http://www.phrap.org/, http://www. , http://carcfordjournals. , # Regex for matching URLs -> http://www.phrap.org/, http://www. , http://carcfordjournals.
# https://www.ims.uni-stuttgart.de/
'CONTAINS_URL': re.compile(r"https?://\S+"), 'CONTAINS_URL': re.compile(r"https?://\S+"),
'ENDS_WITH_RIDE': re.compile(r"ride\b"), 'ENDS_WITH_RIDE': re.compile(r"ride\b"),

Loading…
Cancel
Save