|
|
|
@ -14,41 +14,92 @@ import os
|
|
|
|
|
|
|
|
|
|
|
|
@Predictor.register('citation_intent_predictor')
|
|
|
|
@Predictor.register('citation_intent_predictor')
|
|
|
|
class IntentClassificationPredictor(Predictor):
|
|
|
|
class IntentClassificationPredictor(Predictor):
|
|
|
|
""""Predictor for Citation Intent Classifier"""
|
|
|
|
"""
|
|
|
|
|
|
|
|
~~~Predictor for Citation Intent Classifier~~~
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
- This is just a wrapper class around AllenNLP Model
|
|
|
|
|
|
|
|
used for making predictions from the trained/saved model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
def predict(self, text: str, intent: str):
|
|
|
|
def predict(self, text: str, intent: str):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
This function can be called for each data point from the test dataset,
|
|
|
|
|
|
|
|
takes citation text and the target intent as parameters and
|
|
|
|
|
|
|
|
returns output dictionary from :func: `~classifier.nn.BiLstmClassifier.forward` method
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
:param text: Citation text from test data
|
|
|
|
|
|
|
|
:param intent: target intent of the data point
|
|
|
|
|
|
|
|
:return: returns output dictionary from Model's forward method
|
|
|
|
|
|
|
|
"""
|
|
|
|
return self.predict_json({"citation_text": text, "intent": intent})
|
|
|
|
return self.predict_json({"citation_text": text, "intent": intent})
|
|
|
|
|
|
|
|
|
|
|
|
@overrides
|
|
|
|
@overrides
|
|
|
|
def _json_to_instance(self, json_dict: JsonDict) -> Instance:
|
|
|
|
def _json_to_instance(self, json_dict: JsonDict) -> Instance:
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
we get a callback to this method from AllenNLP Predictor,
|
|
|
|
|
|
|
|
passes JsonDict as a parameter with the data that we passed to the prediction_json function earlier.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
And this callback should return the AllenNLP Instance with tokens and target label.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
:param json_dict: json dictionary data with text and intent label
|
|
|
|
|
|
|
|
:return: returns AllenNLP Instance with tokens(ELMo) and target label
|
|
|
|
|
|
|
|
"""
|
|
|
|
return self._dataset_reader.text_to_instance(json_dict["citation_text"], json_dict["intent"])
|
|
|
|
return self._dataset_reader.text_to_instance(json_dict["citation_text"], json_dict["intent"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def make_predictions(model: Model, dataset_reader: DatasetReader, file_path: str) -> Tuple[List[Dict[str, float]], list]:
|
|
|
|
def make_predictions(model: Model, dataset_reader: DatasetReader, dataset_file_path: str) -> Tuple[list, list]:
|
|
|
|
"""Make predictions using the given model and dataset reader"""
|
|
|
|
"""
|
|
|
|
|
|
|
|
This function takes the pre-trained(saved) Model and DatasetReader(and dataset file path) as arguments
|
|
|
|
|
|
|
|
and returns a Tuple of prediction list and gold/true list.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
- Creates a predictor object with the pre-trained model and dataset reader.
|
|
|
|
|
|
|
|
- Read the data from the passed dataset file path and for each data point, use predictor to predict the intent
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
:param model: a trained/saved AllenNLP Model
|
|
|
|
|
|
|
|
:param dataset_reader: Dataset reader object (for tokenizing text and creating Instances)
|
|
|
|
|
|
|
|
:param dataset_file_path: a dataset file path to make predictions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
:return: returns a Tuple of prediction list and true labels list
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Create predictor class object
|
|
|
|
predictor = IntentClassificationPredictor(model, dataset_reader)
|
|
|
|
predictor = IntentClassificationPredictor(model, dataset_reader)
|
|
|
|
|
|
|
|
|
|
|
|
prediction_list = []
|
|
|
|
prediction_list = []
|
|
|
|
true_list = []
|
|
|
|
true_list = []
|
|
|
|
|
|
|
|
|
|
|
|
jsonl_reader = DataReaderJsonLines(file_path)
|
|
|
|
# read JSON Lines file and Iterate through each datapoint to predict
|
|
|
|
|
|
|
|
jsonl_reader = DataReaderJsonLines(dataset_file_path)
|
|
|
|
for citation in jsonl_reader.read():
|
|
|
|
for citation in jsonl_reader.read():
|
|
|
|
true_list.append(citation.intent)
|
|
|
|
true_list.append(citation.intent)
|
|
|
|
output = predictor.predict(citation.text, citation.intent)
|
|
|
|
output = predictor.predict(citation.text, citation.intent)
|
|
|
|
prediction_list.append(output['prediction'])
|
|
|
|
prediction_list.append(output['prediction'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# returns prediction list and gold labels list - Tuple
|
|
|
|
return prediction_list, true_list
|
|
|
|
return prediction_list, true_list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_model_and_predict_test_data(saved_model_dir: str):
|
|
|
|
def load_model_and_predict_test_data(saved_model_dir: str):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
This function loads the saved model from the specified directory and calls make_predictions function.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
:param saved_model_dir: path of the saved AllenNLP model (typically from IMS common space)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
:return: returns a list of prediction list and true list
|
|
|
|
|
|
|
|
"""
|
|
|
|
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
dev_file_path = project_root + '/data/jsonl/dev.jsonl'
|
|
|
|
dev_file_path = project_root + '/data/jsonl/dev.jsonl'
|
|
|
|
test_file_path = project_root + '/data/jsonl/test.jsonl'
|
|
|
|
test_file_path = project_root + '/data/jsonl/test.jsonl'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# load the archived/saved model
|
|
|
|
model_archive = load_archive(os.path.join(saved_model_dir, 'model.tar.gz'))
|
|
|
|
model_archive = load_archive(os.path.join(saved_model_dir, 'model.tar.gz'))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# create dataset reader object
|
|
|
|
citation_dataset_reader = CitationDataSetReader()
|
|
|
|
citation_dataset_reader = CitationDataSetReader()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# make predictions
|
|
|
|
y_pred, y_true = make_predictions(model_archive.model, citation_dataset_reader, test_file_path)
|
|
|
|
y_pred, y_true = make_predictions(model_archive.model, citation_dataset_reader, test_file_path)
|
|
|
|
|
|
|
|
|
|
|
|
return y_pred, y_true
|
|
|
|
return y_pred, y_true
|
|
|
|
|