Code documentation/comments for predictor

isaac
Pavan Mandava 5 years ago
parent 038e612e4f
commit 804533bc23

@ -170,6 +170,9 @@ def calculate_f1_score(precision, recall):
def get_confusion_matrix(y_true, y_pred): def get_confusion_matrix(y_true, y_pred):
""" """
takes predicted labels and true labels as parameters and returns Confusion Matrix takes predicted labels and true labels as parameters and returns Confusion Matrix
- uses sklearn metric s functions
:param y_true: True labels :param y_true: True labels
:param y_pred: Predicted labels :param y_pred: Predicted labels
:return: returns Confusion Matrix :return: returns Confusion Matrix
@ -178,6 +181,14 @@ def get_confusion_matrix(y_true, y_pred):
def plot_confusion_matrix(confusion_mat, classifier_name, plot_file_name): def plot_confusion_matrix(confusion_mat, classifier_name, plot_file_name):
"""
Saves the confusion matrix plot with the specified file name
:param confusion_mat: takes Confusion Matrix as an argument
:param classifier_name: Classifier name
:param plot_file_name: file name (with path) to save
"""
plt.figure(figsize=(8, 6)) plt.figure(figsize=(8, 6))
plt.imshow(confusion_mat, interpolation='nearest', cmap=plt.get_cmap('Blues')) plt.imshow(confusion_mat, interpolation='nearest', cmap=plt.get_cmap('Blues'))

@ -3,14 +3,14 @@ import testing.intent_predictor as pred
import eval.metrics as metrics import eval.metrics as metrics
model_path = '/mount/arbeitsdaten/studenten1/team-lab-nlp/mandavsi_rileyic/saved_models/experiment_4' saved_model_dir = '/mount/arbeitsdaten/studenten1/team-lab-nlp/mandavsi_rileyic/saved_models/experiment_4'
y_pred, y_true = pred.load_model_and_predict_test_data(model_path) y_pred, y_true = pred.load_model_and_predict_test_data(saved_model_dir)
confusion_matrix = metrics.get_confusion_matrix(y_true, y_pred) confusion_matrix = metrics.get_confusion_matrix(y_true, y_pred)
print(confusion_matrix) print(confusion_matrix)
plot_file_path = model_path+'/confusion_matrix_plot.png' plot_file_path = saved_model_dir+'/confusion_matrix_plot.png'
metrics.plot_confusion_matrix(confusion_matrix, "BiLSTM Classifier + Attention with ELMo", plot_file_path) metrics.plot_confusion_matrix(confusion_matrix, "BiLSTM Classifier + Attention with ELMo", plot_file_path)
print('Confusion Matrix Plot saved to :: ', plot_file_path) print('Confusion Matrix Plot saved to :: ', plot_file_path)

@ -14,41 +14,92 @@ import os
@Predictor.register('citation_intent_predictor') @Predictor.register('citation_intent_predictor')
class IntentClassificationPredictor(Predictor): class IntentClassificationPredictor(Predictor):
""""Predictor for Citation Intent Classifier""" """
~~~Predictor for Citation Intent Classifier~~~
- This is just a wrapper class around AllenNLP Model
used for making predictions from the trained/saved model
"""
def predict(self, text: str, intent: str): def predict(self, text: str, intent: str):
"""
This function can be called for each data point from the test dataset,
takes citation text and the target intent as parameters and
returns output dictionary from :func: `~classifier.nn.BiLstmClassifier.forward` method
:param text: Citation text from test data
:param intent: target intent of the data point
:return: returns output dictionary from Model's forward method
"""
return self.predict_json({"citation_text": text, "intent": intent}) return self.predict_json({"citation_text": text, "intent": intent})
@overrides @overrides
def _json_to_instance(self, json_dict: JsonDict) -> Instance: def _json_to_instance(self, json_dict: JsonDict) -> Instance:
"""
we get a callback to this method from AllenNLP Predictor,
passes JsonDict as a parameter with the data that we passed to the prediction_json function earlier.
And this callback should return the AllenNLP Instance with tokens and target label.
:param json_dict: json dictionary data with text and intent label
:return: returns AllenNLP Instance with tokens(ELMo) and target label
"""
return self._dataset_reader.text_to_instance(json_dict["citation_text"], json_dict["intent"]) return self._dataset_reader.text_to_instance(json_dict["citation_text"], json_dict["intent"])
def make_predictions(model: Model, dataset_reader: DatasetReader, file_path: str) -> Tuple[List[Dict[str, float]], list]: def make_predictions(model: Model, dataset_reader: DatasetReader, dataset_file_path: str) -> Tuple[list, list]:
"""Make predictions using the given model and dataset reader""" """
This function takes the pre-trained(saved) Model and DatasetReader(and dataset file path) as arguments
and returns a Tuple of prediction list and gold/true list.
- Creates a predictor object with the pre-trained model and dataset reader.
- Read the data from the passed dataset file path and for each data point, use predictor to predict the intent
:param model: a trained/saved AllenNLP Model
:param dataset_reader: Dataset reader object (for tokenizing text and creating Instances)
:param dataset_file_path: a dataset file path to make predictions
:return: returns a Tuple of prediction list and true labels list
"""
# Create predictor class object
predictor = IntentClassificationPredictor(model, dataset_reader) predictor = IntentClassificationPredictor(model, dataset_reader)
prediction_list = [] prediction_list = []
true_list = [] true_list = []
jsonl_reader = DataReaderJsonLines(file_path) # read JSON Lines file and Iterate through each datapoint to predict
jsonl_reader = DataReaderJsonLines(dataset_file_path)
for citation in jsonl_reader.read(): for citation in jsonl_reader.read():
true_list.append(citation.intent) true_list.append(citation.intent)
output = predictor.predict(citation.text, citation.intent) output = predictor.predict(citation.text, citation.intent)
prediction_list.append(output['prediction']) prediction_list.append(output['prediction'])
# returns prediction list and gold labels list - Tuple
return prediction_list, true_list return prediction_list, true_list
def load_model_and_predict_test_data(saved_model_dir: str): def load_model_and_predict_test_data(saved_model_dir: str):
"""
This function loads the saved model from the specified directory and calls make_predictions function.
:param saved_model_dir: path of the saved AllenNLP model (typically from IMS common space)
:return: returns a list of prediction list and true list
"""
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
dev_file_path = project_root + '/data/jsonl/dev.jsonl' dev_file_path = project_root + '/data/jsonl/dev.jsonl'
test_file_path = project_root + '/data/jsonl/test.jsonl' test_file_path = project_root + '/data/jsonl/test.jsonl'
# load the archived/saved model
model_archive = load_archive(os.path.join(saved_model_dir, 'model.tar.gz')) model_archive = load_archive(os.path.join(saved_model_dir, 'model.tar.gz'))
# create dataset reader object
citation_dataset_reader = CitationDataSetReader() citation_dataset_reader = CitationDataSetReader()
# make predictions
y_pred, y_true = make_predictions(model_archive.model, citation_dataset_reader, test_file_path) y_pred, y_true = make_predictions(model_archive.model, citation_dataset_reader, test_file_path)
return y_pred, y_true return y_pred, y_true

Loading…
Cancel
Save