diff --git a/eval/metrics.py b/eval/metrics.py
index f844a3d..072fc5a 100644
--- a/eval/metrics.py
+++ b/eval/metrics.py
@@ -1,4 +1,6 @@
 import utils.constants as const
+from sklearn.metrics import confusion_matrix
+import matplotlib.pyplot as plt
 
 
 def f1_score(y_true, y_pred, labels, average):
@@ -163,6 +165,34 @@ def calculate_f1_score(precision, recall):
     return 2 * (precision * recall) / (precision + recall)
 
 
+def get_confusion_matrix(y_true, y_pred):
+    """
+    takes predicted labels and true labels as parameters and returns Confusion Matrix
+    :param y_true: True labels
+    :param y_pred: Predicted labels
+    :return: returns Confusion Matrix
+    """
+    return confusion_matrix(y_true, y_pred, const.CLASS_LABELS_LIST)
+
+
+def plot_confusion_matrix(confusion_mat, classifier_name):
+    """
+    Takes Confusion Matrix as a parameter and plots the matrix using matplotlib
+    :param confusion_mat: Confusion Matrix
+    :param classifier_name: Classifier Name to show it on the Top
+    """
+    fig, ax = plt.subplots(2, 2)
+    ax.matshow(confusion_mat, cmap='Greens')
+    for x in (0, 2):
+        for y in (0, 2):
+            ax.text(x, y, confusion_mat[y, x])
+    ax.set_xlabel('Predicted')
+    ax.set_ylabel('True/Gold')
+    ax.set_xticklabels([''] + const.CLASS_LABELS_LIST)
+    ax.set_yticklabels([''] + const.CLASS_LABELS_LIST)
+    ax.set_title(classifier_name)
+
+
 class Result:
     """
     Model Class for carrying Evaluation Data (F1 Score, Precision, Recall, ....)
diff --git a/run.py b/run.py
index f80a37b..b002a47 100644
--- a/run.py
+++ b/run.py
@@ -1,4 +1,8 @@
 import classifier
 import testing.intent_predictor as pred
 
+import eval.metrics as metrics
+
 y_pred, y_true = pred.load_model_and_predict_test_data("/mount/arbeitsdaten/studenten1/team-lab-nlp/mandavsi_rileyic/saved_models/experiment_4")
+
+metrics.plot_confusion_matrix(metrics.get_confusion_matrix(y_true, y_pred), "BiLSTM Classifier + Attention with ELMo")
\ No newline at end of file
diff --git a/testing/intent_predictor.py b/testing/intent_predictor.py
index 4e00735..2c68414 100644
--- a/testing/intent_predictor.py
+++ b/testing/intent_predictor.py
@@ -24,8 +24,7 @@ class IntentClassificationPredictor(Predictor):
         return self._dataset_reader.text_to_instance(json_dict["citation_text"], json_dict["intent"])
 
 
-def make_predictions(model: Model, dataset_reader: DatasetReader, file_path: str) -> Tuple[
-    List[Dict[str, float]], list]:
+def make_predictions(model: Model, dataset_reader: DatasetReader, file_path: str) -> Tuple[List[Dict[str, float]], list]:
     """Make predictions using the given model and dataset reader"""
 
     predictor = IntentClassificationPredictor(model, dataset_reader)
@@ -33,8 +32,6 @@ def make_predictions(model: Model, dataset_reader: DatasetReader, file_path: str
     prediction_list = []
     true_list = []
 
-    vocab = model.vocab
-
     jsonl_reader = DataReaderJsonLines(file_path)
     for citation in jsonl_reader.read():
         true_list.append(citation.intent)
@@ -54,4 +51,4 @@ def load_model_and_predict_test_data(saved_model_dir: str):
 
     y_pred, y_true = make_predictions(model_archive.model, citation_dataset_reader, test_file_path)
 
-    retun y_pred,y_true
+    return y_pred, y_true
diff --git a/utils/constants.py b/utils/constants.py
index 5389eae..091ead2 100644
--- a/utils/constants.py
+++ b/utils/constants.py
@@ -34,3 +34,4 @@ REGEX_CONSTANTS = {
 }
 
 CLASS_LABELS = {"background": 0, "method": 1, "result": 2}
+CLASS_LABELS_LIST = ['background', 'method', 'result']