diff --git a/.gitignore b/.gitignore index d744ad0..a351d5c 100644 --- a/.gitignore +++ b/.gitignore @@ -188,3 +188,5 @@ corenlp-dir *.props prompt-learning/testing.py testing.py +analysis +analysis.py \ No newline at end of file diff --git a/README.md b/README.md index 5f8527b..2c79f0b 100644 --- a/README.md +++ b/README.md @@ -254,22 +254,62 @@ python evaluate.py -o path/to/outputs/file ## Multi-prompt Learning Experiments -### Prompt Ensemble -**Training** +### Prompt Ensemble -Train a separate model for each data split. Edit the [train_prompting.sh](prompt-learning/train_prompting.sh) file and add `--with_prompt_ensemble` for training with multiple prompt functions. +In the previous section, only a single *value-based* prompt is used at training and inference time. In this task, multiple *value-based* prompts are utilized at training and inference time to leverage the advantages of generation ability from different prompts. This task aims to train a single model with multiple prompts as it is much faster and more memory efficient than having to train a separate model for each prompt (and multiple models at inference time). -// TODO :: Add more README for training and generating. -// WIP :: Prompt ensemble training +| f | prompt functions | +|:--:|:--| +| f1 | belief states: *[v]* = *[s]* | +| f2 | *[v]* is the value of *[s]* | +| f3 | *[v]* is of slot type *[s]* | +| f4 | belief states: value = *[s]*, slot = *[s]* | -### Prompt Augmentation -Prompt Augmentation, also called *demonstration learning*, provides a few additional *answered prompts* that can demonstrate to the PLM, how the actual prompt slot can be answered. Sample selection of answered prompts are manually hand-picked. Experiments are performed on different sets of *answered prompts*. +**Training** -Edit the [test_prompting.sh](prompt-learning/test_prompting.sh) file and add `--with_answered_prompts` flag for generating slots with answered prompts. +A separate prompt ensemble model is trained for each data split to evaluate the performance of multi-prompt methods in low-resource scenarios. Edit the [train_prompting.sh](prompt-learning/train_prompting.sh) file to add `--with_prompt_ensemble` flag for training with multiple prompt functions. -Generate belief states by running the below script: -```shell -sh test_prompting.sh -m +The probability of slot $s_{t}$ on multiple prompt functions are calculated using: + +$$ +P\left(s_{t} \mid c_{t}\right)=\sum_{k}^{|K|} \alpha_{k} * P\left(s_{t} \mid c_{t}, f_{k}\left(v_{t}\right)\right) +$$ +where $|K|$ represents the number of prompt functions, $f_{k}$ is the $k$-th prompt function, $\alpha_{k}$ is the weight of prompt $k$. The prompt weight $\alpha_{k}$ is set to `0.25` for all prompt functions. The loss $L$ for prompt ensemble training is calculated using the above function. + +Run the training script as before after adding the `--with_prompt_ensemble` flag: +```shell +sh train_prompting.sh -d +``` + +**Testing/Slot-generations** + +While testing (slot-generation), a simple majority voting is used to pick the generated slot from different prompts. When there's no simple majority in the generated slots by multiple prompts, the slot with the highest probability is picked. + +Script for generating belief states (slots) using prompt-ensemble remains the same: +(there's no need to add any extra flags here, the scripts checks if the model was trained on multiple prompts and uses ensemble prompts for generating) +```shell +sh test_prompting.sh -m ``` -// TODO :: Add results +### Prompt Augmentation +Prompt Augmentation, also called *demonstration learning*, provides a few additional *answered prompts* that can demonstrate to the PLM, how the actual prompt slot can be answered. Sample selection of answered prompts are hand-crafted and hand-picked manually. Experiments are performed on different sets of *answered prompts*. + +Edit the [test_prompting.sh](prompt-learning/test_prompting.sh) file and add `--with_answered_prompts` flag for generating slots with answered prompts. + +```shell +sh test_prompting.sh -m +``` + +### Results from multi-prompt methods +|data-split| JGA | JGA* | +|--|:--:|:--:| +| 5-dpd | 30.09 | 69.23 | +| 10-dpd | 42.84 | 86.99 | +| 50-dpd | 47.62 | 91.74 | +| 100-dpd | **48.08** | **92.87** | +| 125-dpd | 46.96 | 92.08 | +| 250-dpd | **48.08** | **92.87** | + + +> **Note:** All the generated output files for the above reported results are available in this repository. Check [outputs/multi-prompt](outputs/multi-prompt) directory to see the output JSON files for each data-split. + diff --git a/prompt-learning/prompt_decode.py b/prompt-learning/prompt_decode.py index 22aa1ec..58e4f17 100644 --- a/prompt-learning/prompt_decode.py +++ b/prompt-learning/prompt_decode.py @@ -10,6 +10,7 @@ from dataset import PromptDstDataset from tqdm.auto import tqdm from prompt_utils import get_value_based_prompt from prompt_utils import get_ensemble_prompts +from prompt_utils import get_answered_prompts from metrics import PromptDSTEvaluator from datetime import datetime @@ -54,9 +55,13 @@ def generate_slot_with_prompt_ensemble(args, history, value, tokenizer, model, d gen_probs, gen_words = [], [] + answered_prompts = '' + if args.with_answered_prompts: + answered_prompts = get_answered_prompts() + for prompt in prompts: # combine history and prompt - prompt = history + prompt + prompt = answered_prompts + history + prompt # encode the history & prompt encoded_prompt = tokenizer(prompt, return_tensors="pt") diff --git a/prompt-learning/prompt_utils.py b/prompt-learning/prompt_utils.py index 568d3ee..1dff28b 100644 --- a/prompt-learning/prompt_utils.py +++ b/prompt-learning/prompt_utils.py @@ -35,6 +35,27 @@ PROMPT_TEMPLATES = { } } +ANSWERED_PROMPTS = """ +history: +user : i need to be picked up from city centre after 16:30.\n +belief states: +value = city centre, slot = departure +value = 16:30, slot = leave + +history: +user : hi, i need to leave from frankfurt airport. can you find me a train after 20:15 ?\n +belief states: +value = frankfurt airport, slot = departure +value = 20:15, slot = leave + +history: +user : i need a taxi at 8:30 to go from city centre to university.\n +belief states: +value = city centre, slot = departure +value = university, slot = destination +value = 8:30, slot = leave +""" + def get_prompt_for_training(typ, slot_value): template = PROMPT_TEMPLATES[typ]['training'] @@ -62,3 +83,7 @@ def get_ensemble_prompts(value): template = Template(template_str) prompt_list.append(template.substitute(value=value)) return prompt_list + + +def get_answered_prompts(): + return ANSWERED_PROMPTS diff --git a/prompt-learning/test_prompting.sh b/prompt-learning/test_prompting.sh index 8ff2d8d..6e1543d 100644 --- a/prompt-learning/test_prompting.sh +++ b/prompt-learning/test_prompting.sh @@ -51,4 +51,5 @@ mkdir -p "${OUTPUTS_DIR}" python prompt_decode.py \ --output_dir="${OUTPUTS_DIR}" \ --tuned_model_path="${FINE_TUNED_MODEL_PATH}" \ ---test_data_file="${TEST_DATA_FILE}" \ No newline at end of file +--test_data_file="${TEST_DATA_FILE}" \ +--with_prompt_ensemble \ No newline at end of file