Added Presentation slides to the repo

main
Pavan Mandava 3 years ago
parent 2e123c5106
commit 5b1e2e4938

@ -7,10 +7,14 @@ include_toc: true
Repository for my master thesis at the University of Stuttgart (IMS).
Refer to this thesis [proposal](proposal/proposal_submission_1st.pdf) document for detailed explanation about thesis experiments.
Proposal [PDF](proposal/proposal-final.pdf) - with detailed explanation of thesis experiments
Presentation [PDF](presentation/presentation.pdf) - slides from the thesis presentation
Thesis [PDF](thesis.pdf) - details about methods, experiments, results, discussion
## Dataset
MultiWOZ 2.1 [dataset](https://github.com/budzianowski/multiwoz/blob/master/data/MultiWOZ_2.1.zip) is used for training and evaluation of the baseline/prompt-based methods. MultiWOZ is a fully-labeled dataset with a collection of human-human written conversations spanning over multiple domains and topics. Only single-domain dialogues are used in this setup for training and testing. Each dialogue contains multiple turns and may also contain a subdomain *booking*. Five domains - *Hotel, Train, Restaurant, Attraction, Taxi* are used in the experiments and excluded the other two domains as they only appear in the training set. Under few-shot settings, only a portion of the training data is utilized to measure the performance of the DST task in a low-resource scenario. Dialogues are randomly picked for each domain. The below table contains some statistics of the dataset and data splits for the few-shot experiments.
MultiWOZ 2.1 [dataset](https://github.com/budzianowski/multiwoz/blob/master/data/MultiWOZ_2.1.zip) is used for training and evaluation of the baseline/prompt-based methods. Only single-domain dialogues are used in this setup for training and testing. Each dialogue contains multiple turns and may also contain a subdomain *booking*. Five domains - *Hotel, Train, Restaurant, Attraction, Taxi* are used in the experiments and excluded the other two domains as they only appear in the training set. Under few-shot settings, only a portion of the training data is utilized to measure the performance of the DST task in a low-resource scenario.
| Data Split | # Dialogues | # Total Turns |
|--|:--:|:--:|

@ -0,0 +1,375 @@
@article{yang2022prompt,
author = {Yuting Yang and
Wenqiang Lei and
Juan Cao and
Jintao Li and
Tat{-}Seng Chua},
title = {Prompt Learning for Few-Shot Dialogue State Tracking},
journal = {CoRR},
volume = {abs/2201.05780},
year = {2022},
url = {https://arxiv.org/abs/2201.05780},
eprinttype = {arXiv},
eprint = {2201.05780},
timestamp = {Mon, 18 Jul 2022 13:17:40 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2201-05780.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{liu2021ppp,
author = {Pengfei Liu and
Weizhe Yuan and
Jinlan Fu and
Zhengbao Jiang and
Hiroaki Hayashi and
Graham Neubig},
title = {Pre-train, Prompt, and Predict: {A} Systematic Survey of Prompting
Methods in Natural Language Processing},
journal = {CoRR},
volume = {abs/2107.13586},
year = {2021},
url = {https://arxiv.org/abs/2107.13586},
eprinttype = {arXiv},
eprint = {2107.13586},
timestamp = {Tue, 03 Aug 2021 14:53:34 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2107-13586.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{brown2020gpt3,
author = {Brown, Tom and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared D and Dhariwal, Prafulla and Neelakantan, Arvind and Shyam, Pranav and Sastry, Girish and Askell, Amanda and Agarwal, Sandhini and Herbert-Voss, Ariel and Krueger, Gretchen and Henighan, Tom and Child, Rewon and Ramesh, Aditya and Ziegler, Daniel and Wu, Jeffrey and Winter, Clemens and Hesse, Chris and Chen, Mark and Sigler, Eric and Litwin, Mateusz and Gray, Scott and Chess, Benjamin and Clark, Jack and Berner, Christopher and McCandlish, Sam and Radford, Alec and Sutskever, Ilya and Amodei, Dario},
booktitle = {Advances in Neural Information Processing Systems},
editor = {H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin},
pages = {1877--1901},
publisher = {Curran Associates, Inc.},
title = {Language Models are Few-Shot Learners},
url = {https://proceedings.neurips.cc/paper/2020/file/1457c0d6bfcb4967418bfb8ac142f64a-Paper.pdf},
volume = {33},
year = {2020}
}
@article{madotto2021fsb,
author = {Andrea Madotto and
Zhaojiang Lin and
Genta Indra Winata and
Pascale Fung},
title = {Few-Shot Bot: Prompt-Based Learning for Dialogue Systems},
journal = {CoRR},
volume = {abs/2110.08118},
year = {2021},
url = {https://arxiv.org/abs/2110.08118},
eprinttype = {arXiv},
eprint = {2110.08118},
timestamp = {Fri, 22 Oct 2021 13:33:09 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2110-08118.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{radford2018gpt,
title={Improving language understanding by generative pre-training},
author={Radford, Alec and Narasimhan, Karthik and Salimans, Tim and Sutskever, Ilya and others},
year={2018},
publisher={OpenAI}
}
@article{radford2019gpt2,
title={Language models are unsupervised multitask learners},
author={Radford, Alec and Wu, Jeffrey and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya and others},
journal={OpenAI blog},
volume={1},
number={8},
pages={9},
year={2019}
}
@inproceedings{devlin2019bert,
title = "{BERT}: Pre-training of Deep Bidirectional Transformers for Language Understanding",
author = "Devlin, Jacob and
Chang, Ming-Wei and
Lee, Kenton and
Toutanova, Kristina",
booktitle = "Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)",
month = jun,
year = "2019",
address = "Minneapolis, Minnesota",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/N19-1423",
doi = "10.18653/v1/N19-1423",
pages = "4171--4186",
abstract = "We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations from Transformers. Unlike recent language representation models (Peters et al., 2018a; Radford et al., 2018), BERT is designed to pre-train deep bidirectional representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result, the pre-trained BERT model can be fine-tuned with just one additional output layer to create state-of-the-art models for a wide range of tasks, such as question answering and language inference, without substantial task-specific architecture modifications. BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural language processing tasks, including pushing the GLUE score to 80.5 (7.7 point absolute improvement), MultiNLI accuracy to 86.7{\%} (4.6{\%} absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement).",
}
@article{peng2021soloist,
title = "{SOLOIST:} Building Task Bots at Scale with Transfer Learning and Machine Teaching",
author = "Peng, Baolin and
Li, Chunyuan and
Li, Jinchao and
Shayandeh, Shahin and
Liden, Lars and
Gao, Jianfeng",
journal = "Transactions of the Association for Computational Linguistics",
volume = "9",
year = "2021",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2021.tacl-1.49",
doi = "10.1162/tacl_a_00399",
pages = "807--824",
abstract = "Abstract We present a new method, Soloist,1 that uses transfer learning and machine teaching to build task bots at scale. We parameterize classical modular task-oriented dialog systems using a Transformer-based auto-regressive language model, which subsumes different dialog modules into a single neural model. We pre-train, on heterogeneous dialog corpora, a task-grounded response generation model, which can generate dialog responses grounded in user goals and real-world knowledge for task completion. The pre-trained model can be efficiently adapted to accomplish new tasks with a handful of task-specific dialogs via machine teaching, where training samples are generated by human teachers interacting with the system. Experiments show that (i)Soloist creates new state-of-the-art on well-studied task-oriented dialog benchmarks, including CamRest676 and MultiWOZ; (ii) in the few-shot fine-tuning settings, Soloist significantly outperforms existing methods; and (iii) the use of machine teaching substantially reduces the labeling cost of fine-tuning. The pre-trained models and codes are available at https://aka.ms/soloist.",
}
@article{lee2021sdp,
author = {Chia{-}Hsuan Lee and
Hao Cheng and
Mari Ostendorf},
title = {Dialogue State Tracking with a Language Model using Schema-Driven
Prompting},
journal = {CoRR},
volume = {abs/2109.07506},
year = {2021},
url = {https://arxiv.org/abs/2109.07506},
eprinttype = {arXiv},
eprint = {2109.07506},
timestamp = {Wed, 03 Nov 2021 08:48:34 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2109-07506.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{eric2019multiwoz,
author = {Mihail Eric and
Rahul Goel and
Shachi Paul and
Abhishek Sethi and
Sanchit Agarwal and
Shuyang Gao and
Dilek Hakkani{-}T{\"{u}}r},
title = {MultiWOZ 2.1: Multi-Domain Dialogue State Corrections and State Tracking Baselines},
journal = {CoRR},
volume = {abs/1907.01669},
year = {2019},
url = {http://arxiv.org/abs/1907.01669},
eprinttype = {arXiv},
eprint = {1907.01669},
timestamp = {Fri, 09 Aug 2019 10:00:01 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1907-01669.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{budzianowski2018multiwoz,
title = "{M}ulti{WOZ} - A Large-Scale Multi-Domain {W}izard-of-{O}z Dataset for Task-Oriented Dialogue Modelling",
author = "Budzianowski, Pawe{\l} and
Wen, Tsung-Hsien and
Tseng, Bo-Hsiang and
Casanueva, I{\~n}igo and
Ultes, Stefan and
Ramadan, Osman and
Ga{\v{s}}i{\'c}, Milica",
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing",
month = oct # "-" # nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D18-1547",
doi = "10.18653/v1/D18-1547",
pages = "5016--5026",
abstract = "Even though machine learning has become the major scene in dialogue research community, the real breakthrough has been blocked by the scale of data available.To address this fundamental obstacle, we introduce the Multi-Domain Wizard-of-Oz dataset (MultiWOZ), a fully-labeled collection of human-human written conversations spanning over multiple domains and topics.At a size of 10k dialogues, it is at least one order of magnitude larger than all previous annotated task-oriented corpora.The contribution of this work apart from the open-sourced dataset is two-fold:firstly, a detailed description of the data collection procedure along with a summary of data structure and analysis is provided. The proposed data-collection pipeline is entirely based on crowd-sourcing without the need of hiring professional annotators;secondly, a set of benchmark results of belief tracking, dialogue act and response generation is reported, which shows the usability of the data and sets a baseline for future studies.",
}
@inproceedings{min2020dsi,
title = {Dialogue State Induction Using Neural Latent Variable Models},
author = {Min, Qingkai and Qin, Libo and Teng, Zhiyang and Liu, Xiao and Zhang, Yue},
booktitle = {Proceedings of the Twenty-Ninth International Joint Conference on
Artificial Intelligence, {IJCAI-20}},
publisher = {International Joint Conferences on Artificial Intelligence Organization},
editor = {Christian Bessiere},
pages = {3845--3852},
year = {2020},
month = {7},
note = {Main track},
doi = {10.24963/ijcai.2020/532},
url = {https://doi.org/10.24963/ijcai.2020/532},
}
@inproceedings{gao2021lmbff,
title = "Making Pre-trained Language Models Better Few-shot Learners",
author = "Gao, Tianyu and
Fisch, Adam and
Chen, Danqi",
booktitle = "Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.acl-long.295",
doi = "10.18653/v1/2021.acl-long.295",
pages = "3816--3830",
abstract = "The recent GPT-3 model (Brown et al., 2020) achieves remarkable few-shot performance solely by leveraging a natural-language prompt and a few task demonstrations as input context. Inspired by their findings, we study few-shot learning in a more practical scenario, where we use smaller language models for which fine-tuning is computationally efficient. We present LM-BFF{---}better few-shot fine-tuning of language models{---}a suite of simple and complementary techniques for fine-tuning language models on a small number of annotated examples. Our approach includes (1) prompt-based fine-tuning together with a novel pipeline for automating prompt generation; and (2) a refined strategy for dynamically and selectively incorporating demonstrations into each context. Finally, we present a systematic evaluation for analyzing few-shot performance on a range of NLP tasks, including classification and regression. Our experiments demonstrate that our methods combine to dramatically outperform standard fine-tuning procedures in this low resource setting, achieving up to 30{\%} absolute improvement, and 11{\%} on average across all tasks. Our approach makes minimal assumptions on task resources and domain expertise, and hence constitutes a strong task-agnostic method for few-shot learning.",
}
@inproceedings{cui2021template,
title = "Template-Based Named Entity Recognition Using {BART}",
author = "Cui, Leyang and
Wu, Yu and
Liu, Jian and
Yang, Sen and
Zhang, Yue",
booktitle = "Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.findings-acl.161",
doi = "10.18653/v1/2021.findings-acl.161",
pages = "1835--1845",
}
@inproceedings{schick2021pet,
title = "Few-Shot Text Generation with Natural Language Instructions",
author = {Schick, Timo and Sch{\"u}tze, Hinrich},
booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.emnlp-main.32",
doi = "10.18653/v1/2021.emnlp-main.32",
pages = "390--402",
abstract = "Providing pretrained language models with simple task descriptions in natural language enables them to solve some tasks in a fully unsupervised fashion. Moreover, when combined with regular learning from examples, this idea yields impressive few-shot results for a wide range of text classification tasks. It is also a promising direction to improve data efficiency in generative settings, but there are several challenges to using a combination of task descriptions and example-based learning for text generation. In particular, it is crucial to find task descriptions that are easy to understand for the pretrained model and to ensure that it actually makes good use of them; furthermore, effective measures against overfitting have to be implemented. In this paper, we show how these challenges can be tackled: We introduce GenPET, a method for text generation that is based on pattern-exploiting training, a recent approach for combining textual instructions with supervised learning that only works for classification tasks. On several summarization and headline generation datasets, GenPET gives consistent improvements over strong baselines in few-shot settings.",
}
@inproceedings{li2021coco,
title={CoCo: Controllable Counterfactuals for Evaluating Dialogue State Trackers},
author={Shiyang Li and Semih Yavuz and Kazuma Hashimoto and Jia Li and Tong Niu and Nazneen Rajani and Xifeng Yan and Yingbo Zhou and Caiming Xiong},
booktitle={International Conference on Learning Representations},
year={2021},
url={https://openreview.net/forum?id=eom0IUrF__F}
}
@inproceedings{vaswani2017attention,
author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, \L ukasz and Polosukhin, Illia},
booktitle = {Advances in Neural Information Processing Systems},
editor = {I. Guyon and U. Von Luxburg and S. Bengio and H. Wallach and R. Fergus and S. Vishwanathan and R. Garnett},
pages = {},
publisher = {Curran Associates, Inc.},
title = {Attention is All you Need},
url = {https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf},
volume = {30},
year = {2017}
}
@inproceedings{holtzman2020topp,
title={The Curious Case of Neural Text Degeneration},
author={Ari Holtzman and Jan Buys and Li Du and Maxwell Forbes and Yejin Choi},
booktitle={International Conference on Learning Representations},
year={2020},
url={https://openreview.net/forum?id=rygGQyrFvH}
}
@inproceedings{fan2018topk,
title = "Hierarchical Neural Story Generation",
author = "Fan, Angela and
Lewis, Mike and
Dauphin, Yann",
booktitle = "Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P18-1082",
doi = "10.18653/v1/P18-1082",
pages = "889--898",
abstract = "We explore story generation: creative systems that can build coherent and fluent passages of text about a topic. We collect a large dataset of 300K human-written stories paired with writing prompts from an online forum. Our dataset enables hierarchical story generation, where the model first generates a premise, and then transforms it into a passage of text. We gain further improvements with a novel form of model fusion that improves the relevance of the story to the prompt, and adding a new gated multi-scale self-attention mechanism to model long-range context. Experiments show large improvements over strong baselines on both automated and human evaluations. Human judges prefer stories generated by our approach to those from a strong non-hierarchical model by a factor of two to one.",
}
@inproceedings{qi2020stanza,
title = "{S}tanza: A Python Natural Language Processing Toolkit for Many Human Languages",
author = "Qi, Peng and
Zhang, Yuhao and
Zhang, Yuhui and
Bolton, Jason and
Manning, Christopher D.",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.acl-demos.14",
doi = "10.18653/v1/2020.acl-demos.14",
pages = "101--108",
abstract = "We introduce Stanza, an open-source Python natural language processing toolkit supporting 66 human languages. Compared to existing widely used toolkits, Stanza features a language-agnostic fully neural pipeline for text analysis, including tokenization, multi-word token expansion, lemmatization, part-of-speech and morphological feature tagging, dependency parsing, and named entity recognition. We have trained Stanza on a total of 112 datasets, including the Universal Dependencies treebanks and other multilingual corpora, and show that the same neural architecture generalizes well and achieves competitive performance on all languages tested. Additionally, Stanza includes a native Python interface to the widely used Java Stanford CoreNLP software, which further extends its functionality to cover other tasks such as coreference resolution and relation extraction. Source code, documentation, and pretrained models for 66 languages are available at https://stanfordnlp.github.io/stanza/.",
}
@inproceedings{wolf2020transformers,
title = "Transformers: State-of-the-Art Natural Language Processing",
author = "Wolf, Thomas and
Debut, Lysandre and
Sanh, Victor and
Chaumond, Julien and
Delangue, Clement and
Moi, Anthony and
Cistac, Pierric and
Rault, Tim and
Louf, Remi and
Funtowicz, Morgan and
Davison, Joe and
Shleifer, Sam and
von Platen, Patrick and
Ma, Clara and
Jernite, Yacine and
Plu, Julien and
Xu, Canwen and
Le Scao, Teven and
Gugger, Sylvain and
Drame, Mariama and
Lhoest, Quentin and
Rush, Alexander",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations",
month = oct,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-demos.6",
doi = "10.18653/v1/2020.emnlp-demos.6",
pages = "38--45",
abstract = "Recent progress in natural language processing has been driven by advances in both model architecture and model pretraining. Transformer architectures have facilitated building higher-capacity models and pretraining has made it possible to effectively utilize this capacity for a wide variety of tasks. Transformers is an open-source library with the goal of opening up these advances to the wider machine learning community. The library consists of carefully engineered state-of-the art Transformer architectures under a unified API. Backing this library is a curated collection of pretrained models made by and available for the community. Transformers is designed to be extensible by researchers, simple for practitioners, and fast and robust in industrial deployments. The library is available at https://github.com/huggingface/transformers.",
}
@inproceedings{kingma2015adam,
author = {Diederik P. Kingma and
Jimmy Ba},
editor = {Yoshua Bengio and
Yann LeCun},
title = {Adam: {A} Method for Stochastic Optimization},
booktitle = {3rd International Conference on Learning Representations, {ICLR} 2015,
San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings},
year = {2015},
url = {http://arxiv.org/abs/1412.6980}
}
@article{ni2021dlds,
author = {Jinjie Ni and
Tom Young and
Vlad Pandelea and
Fuzhao Xue and
Vinay Adiga and
Erik Cambria},
title = {Recent Advances in Deep Learning Based Dialogue Systems: {A} Systematic
Survey},
journal = {CoRR},
volume = {abs/2105.04387},
year = {2021},
url = {https://arxiv.org/abs/2105.04387},
eprinttype = {arXiv},
eprint = {2105.04387},
timestamp = {Mon, 31 May 2021 08:19:46 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2105-04387.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{wu2020tod-bert,
title = "{TOD}-{BERT}: Pre-trained Natural Language Understanding for Task-Oriented Dialogue",
author = "Wu, Chien-Sheng and
Hoi, Steven C.H. and
Socher, Richard and
Xiong, Caiming",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-main.66",
doi = "10.18653/v1/2020.emnlp-main.66",
pages = "917--929",
abstract = "The underlying difference of linguistic patterns between general text and task-oriented dialogue makes existing pre-trained language models less useful in practice. In this work, we unify nine human-human and multi-turn task-oriented dialogue datasets for language modeling. To better model dialogue behavior during pre-training, we incorporate user and system tokens into the masked language modeling. We propose a contrastive objective function to simulate the response selection task. Our pre-trained task-oriented dialogue BERT (TOD-BERT) outperforms strong baselines like BERT on four downstream task-oriented dialogue applications, including intention recognition, dialogue state tracking, dialogue act prediction, and response selection. We also show that TOD-BERT has a stronger few-shot ability that can mitigate the data scarcity problem for task-oriented dialogue.",
}
@inproceedings{schick2021pet,
title = "Exploiting Cloze-Questions for Few-Shot Text Classification and Natural Language Inference",
author = {Schick, Timo and
Sch{\"u}tze, Hinrich},
booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume",
month = apr,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.eacl-main.20",
doi = "10.18653/v1/2021.eacl-main.20",
pages = "255--269",
abstract = "Some NLP tasks can be solved in a fully unsupervised fashion by providing a pretrained language model with {``}task descriptions{''} in natural language (e.g., Radford et al., 2019). While this approach underperforms its supervised counterpart, we show in this work that the two ideas can be combined: We introduce Pattern-Exploiting Training (PET), a semi-supervised training procedure that reformulates input examples as cloze-style phrases to help language models understand a given task. These phrases are then used to assign soft labels to a large set of unlabeled examples. Finally, standard supervised training is performed on the resulting training set. For several tasks and languages, PET outperforms supervised training and strong semi-supervised approaches in low-resource settings by a large margin.",
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 92 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 264 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 227 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 82 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 130 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 102 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 59 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 147 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 139 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 199 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 62 KiB

Binary file not shown.

@ -0,0 +1,554 @@
\documentclass[10pt]{beamer}
\usepackage{beamerthemesplit}
\usepackage[utf8]{inputenc}
\usepackage[font=small,figurename=Fig]{caption}
\usepackage{graphicx}
\graphicspath{ {images/} }
\usepackage[style=authoryear, backend=biber]{biblatex}
\addbibresource{bibliography.bib}
\usetheme{Frankfurt}
\usecolortheme{default}
\title[Prompt-based methods for DST]{Prompt-based methods for Dialog State Tracking}
\subtitle{Thesis Presentation}
\author[Pavan Mandava]{Mandava, Sai Pavan}
\institute{Institut für Maschinelle Sprachverarbeitung\\
Universität Stuttgart}
\date[Thesis Presentation]{10.02.2023}
\AtBeginSection[]
{
\begin{frame}
\frametitle{Outline}
\tableofcontents[currentsection]
\end{frame}
}
\begin{document}
\frame{\titlepage}
\begin{frame}{Outline}
\tableofcontents
\end{frame}
\section{Introduction \& Motivation}
\begin{frame} \frametitle{Introduction}
\begin{itemize}
\item Task-oriented dialog systems
\begin{itemize}
\item perform a wide range of tasks across multiple domains
\item \textsl{E.g. ticket booking, restaurant booking, etc.}
\end{itemize}
\item Modular-based dialog systems
\begin{itemize}
\item NLU, DST, PL, NLG
\end{itemize}
\end{itemize}
\vspace{8pt}
\begin{figure}
\centering
\includegraphics[width=0.8\textwidth]{modular_tod.png}
\caption{Modular-based task-oriented dialog system}
\end{figure}
\end{frame}
\begin{frame} \frametitle{Dialog State Tracking (DST)}
\begin{itemize}
\item Essential module for the dialog system to understand user's requests
\item Tracks the user goals in the form of dialog states (or ``belief states")
\item Dialog states contains a set of \textsl{(slot, value)} pairs
\begin{itemize}
\item Updated at each turn of the conversation
\end{itemize}
\end{itemize}
\begin{block} {DST Example}
\textsf{\textbf{USER:}} Plan a train trip to Berlin this Friday for two people.\\
\textbf{Belief states:} \{(\textsl{destination, Berlin}), (\textsl{day, Friday}), (\textsl{people, 2})\}
\end{block}
\begin{itemize}
\item Ontology of domains
\begin{itemize}
\item Represents knowledge \& information required for specific tasks
\item Contains pre-defined set of slots and all possible values for each slot
\item Some Neural-based models solve the DST as classification task
\end{itemize}
\item Problems with depending on ontology
\begin{itemize}
\item Ontology is hard to obtain for new domains
\item Costly and time-consuming
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame} \frametitle{PLMs \& Prompt Learning}
\begin{itemize}
\item Pre-trained Language Models (PLMs)
\begin{itemize}
\item Trained on large amounts of textual data
\item Encode linguistic knowledge into the huge amount of parameters
\item Can be efficiently used to solve NLP tasks
\item BERT\parencite{devlin2019bert}, GPT-2\parencite{radford2019gpt2}, GPT-3\parencite{brown2020gpt3}
\end{itemize}
\item Prompt Learning
\begin{itemize}
\item New way of efficiently using the generation capabilities of PLMs to solve different language tasks
\item Downstream task is converted to a textual prompt and given as input, the PLM directly generates the outputs from prompts
\item Prompting methods can be effectively used under \textsl{zero-shot} and \textsl{few-shot} settings when there's not enough training data
\item GPT-3 \parencite{brown2020gpt3}, Few-shot Bot \parencite{madotto2021fsb}, \textsc{PET} \parencite{schick2021pet} explored prompt-based methods for several tasks
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Prompt Learning (contd.)}
\begin{figure}
\centering
\includegraphics[width=0.75\textwidth]{prompt_terminology.png}
\caption{Terminology and notations in prompt learning}
\end{figure}
\vspace{-4pt}
\begin{itemize}
\item Prompt Types: \textsl{prefix} \& \textsl{cloze} prompts
\item Prompt selection: manual, discrete, \& continuous prompts
\item Training strategy: Fixed-prompt LM Fine Tuning
\begin{itemize}
\item fixed prompts are applied to training data and fine-tune the LM
\item under low-resource few-shot settings
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Motivation \& Objectives}
\begin{itemize}
\item Previous work \& their limitations
\begin{itemize}
\item \textsc{TOD-BERT} \parencite{wu2020tod-bert}
\begin{itemize}
\item Pre-trained BERT on 9 different task-oriented datasets
\item Fine-tuned for DST task as multi-class classification
\item Depends on the ontology of domains for predicting slot-values
\end{itemize}
\item \textsc{Soloist} \parencite{peng2021soloist}
\begin{itemize}
\item Pre-trained GPT-2 for two dialogue datasets
\item Fine-tuned to generate belief states as sequence of words
\item Performs poorly under low-resource settings
\end{itemize}
\end{itemize}
\item Research Objectives
\begin{itemize}
\item Can the dialog states be extracted from the PLM using prompts?
\item Can the prompt-based methods learn the DST task under low-resource settings without depending on the ontology of domains?
\item Compare prompt-based approach with the baseline model
\item Identify the drawbacks \& limitations of prompt-based approach
\item Can different multi-prompt techniques help improve the performance of DST task?
\end{itemize}
\end{itemize}
\end{frame}
\section{Methods}
\begin{frame} \frametitle{Dataset - MultiWOZ \parencite{budzianowski2018multiwoz}}
\begin{itemize}
\item MultiWOZ 2.1 \parencite{eric2019multiwoz} is used to benchmark the DST
\item Contains huge number of dialogues across multiple domains
\item Each Dialog $\rightarrow$ multiple turns $\rightarrow$ multiple \textsl{(slot,value)} pairs
\item Five domains are picked for few-shot experiments
\begin{itemize}
\item \textsl{Restaurant, Hotel, Attraction, Taxi, Train}
\end{itemize}
\item Six data splits are created to perform few-shot experiments
\begin{itemize}
\item Different proportions of dialogues in each split
\item All the five domains are evenly distributed in each split
\end{itemize}
\end{itemize}
\begin{figure}
\centering
\includegraphics[width=0.75\textwidth]{data_splits.png}
%% \caption{Terminology and notations in prompt learning}
\end{figure}
\end{frame}
\begin{frame} \frametitle{Baseline (\textsc{Soloist})}
\begin{itemize}
\item \textsc{Soloist} \parencite{peng2021soloist} is the baseline model
\item Initialized with 12-layer GPT-2 language model
\item Pre-training step
\begin{itemize}
\item Pre-trained on two task-oriented dialogue datasets
\item Pre-trained model is publicly available
\end{itemize}
\item Fine-tuning step
\begin{itemize}
\item Fine-tuned on all MultiWOZ 2.1 data splits to perform the belief predictions task
\item Takes dialog history as input and generates belief states as sequence of words
\item \textsl{belief: $slot_1 = value_1; slot_2 = value_2, \ldots$}
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Prompt-based methods}
\begin{itemize}
\item \cite{yang2022prompt} proposed prompt learning framework for DST
\item This approach doesn't depend on the ontology of domains
\item Two components: \textsl{value-based prompt} and \textsl{inverse prompt}
\item Value-based prompt uses belief state values in prompts and generates the slots from PLM
\item Inverse prompt is an auxiliary task that uses the slot generated from value-based prompt and attempts to generate back the value.
\end{itemize}
\begin{figure}
\centering
\includegraphics[width=0.85\textwidth]{prompt_methods.png}
%% \caption{Terminology and notations in prompt learning}
\end{figure}
\end{frame}
\begin{frame} \frametitle{Prompt-based methods - Training}
\begin{table}[h!]
\centering
\begingroup
\setlength{\tabcolsep}{8pt} % Default value: 6pt
\renewcommand{\arraystretch}{1.1} % Default value: 1
\begin{tabular}{ll}
\hline
\multicolumn{1}{c}{\textbf{Type}} & \multicolumn{1}{c}{\textbf{Prompt templates}} \\
\hline
value-based prompt & belief states: value = [v], slot = [s] \\
inverse prompt & belief states: slot = [s], value = [v] \\
\hline
\end{tabular}
\endgroup
\end{table}
\begin{itemize}
\item The pre-trained Soloist is used to fine-tune the prompting methods
\item All MultiWOZ data splits are used in the fine-tuning phase
\item Loss function for value-based prompt
$$\mathcal{L}=-\sum_{t}^{|D|} \log P\left(s_{t} \mid c_{t}, f\left(v_{t}\right)\right)$$
\item Loss function for inverse prompt
$$\tilde{\mathcal{L}}=-\sum_{t}^{|D|} \log P\left(v^{\prime}_{t} \mid c_{t}, I\left(s_{t}\right)\right)$$
\item Total Loss: $\mathcal{L}^{*} = \mathcal{L} + w *\tilde{\mathcal{L}}$
\begin{itemize}
\item Experiments are performed on different inverse prompt weights $w$
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Prompt-based methods - Testing}
\begin{itemize}
\item Testing slot generation
\begin{itemize}
\item During inference, only value-based prompts are used
\item Prompts are filled with values and given as input to PLM
\item Next word with the highest probability is the generated slot
\item Rule-based approach for extracting value candidates
\end{itemize}
\item Rule-based Value Extraction:
\begin{itemize}
\item Stanford CoreNLP Stanza is used to first extract POS tags
\item Adjectives \textsf{(JJ)} and Adverbs \textsf{(RB)} are considered as possible values
\item Consider the previous negator `not'
\item Consider all named entities (name of place, time, day, numbers)
\item Custom Regex NER rules, filtered stop words and repeated values
\end{itemize}
\end{itemize}
\begin{figure}
\centering
\includegraphics[width=0.72\textwidth]{corenlp.png}
\end{figure}
\end{frame}
\begin{frame} \frametitle{Multi-prompt method (Prompt Ensemble)}
\begin{itemize}
\item Only a single value-based prompt is used in the previous experiments
\item Multiple prompts can be used together to improve the performance
\item Prompt Ensembling uses multiple value-based prompts during training and inference to take advantage of different prompts
\item Four hand-crafted prompt templates for value-based prompt
\end{itemize}
\begin{table}
\centering
\begin{tabular}{c l}
\hline
\multicolumn{2}{c}{\textbf{Prompt ensemble templates}}\\
\hline
$f_{1}$ & belief states: [v] = [s]\\
$f_{2}$ & [v] is the value of [s]\\
$f_{3}$ & [v] is of slot type [s]\\
$f_{4}$ & belief states: value = [v], slot = [s]\\
\hline
\end{tabular}
\end{table}
\begin{itemize}
\item A single model is trained with multiple prompts
\item The probability of generated slot over multiple prompt functions:
$$P\left(s_{t} \mid c_{t}\right)=\sum_{k}^{|K|} \alpha_{k} * P\left(s_{t} \mid c_{t}, f_{k}\left(v_{t}\right)\right)$$
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Multi-prompt method (Prompt Augmentation)}
\begin{itemize}
\item Provides a few additional answered prompts that can demonstrate to the PLM how the actual task can be performed
\item Sample selection is manually hand-picked from training data
\item Experiments are performed on two sets of demonstration samples
\begin{itemize}
\item Sample set 1: 8 demonstrations
\item Sample set 2: 5 demonstrations
\end{itemize}
\item Demonstrations are concatenated to the input during inference
\item Number of demonstration examples that can be used is bounded by the GPT-2 max input length of 1024
\end{itemize}
\begin{table}
\centering
\begingroup
\setlength{\tabcolsep}{2pt}
\begin{tabular}{ r l }
\hline
\multicolumn{2}{c}{\textbf{Demonstration learning}} \\
\hline
Book a cheap flight to Frankfurt. & \textit{Frankfurt} is of slot \textit{destination}\\
Plan a train trip to Berlin. & \textit{Berlin} is of slot \textit{destination}\\
Book a taxi to the University. & \textit{University} is of slot \textit{destination}\\
Book a train to Stuttgart. & \textit{Stuttgart} is of slot [s]\\
\hline
\end{tabular}
\endgroup
\end{table}
\end{frame}
\begin{frame} \frametitle{Evaluation Metrics}
\begin{itemize}
\item Joint Goal Accuracy (JGA)
\begin{itemize}
\item Standard evaluation metric for DST
\item Correct if all the predicted belief states match with the ground-truth
\item All the slots and values must exactly match
\end{itemize}
\item Rule-based value extraction methods may extract irrelevant values
\item JGA* \parencite{yang2022prompt}
\begin{itemize}
\item To exclude the influence of wrongly extracted values, JGA* is used
\item JGA* - Joint Goal Accuracy is computed only for the belief states where the values are extracted correctly
\end{itemize}
\end{itemize}
\end{frame}
\section{Results}
\begin{frame} \frametitle{Baseline (\textsc{Soloist}) results}
\begin{figure}
\centering
\includegraphics[width=0.9\textwidth]{baseline_results.png}
\end{figure}
\end{frame}
\begin{frame} \frametitle{ Prompt-based methods}
\begin{figure}
\centering
\includegraphics[width=0.8\textwidth]{prompt_results.png}
\end{figure}
\end{frame}
\begin{frame} \frametitle{Prompt Ensemble results}
\begin{figure}
\centering
\includegraphics[width=0.9\textwidth]{ensemble_results.png}
\end{figure}
\end{frame}
\begin{frame} \frametitle{Prompt Augmentation results}
\begin{figure}
\centering
\includegraphics[width=0.9\textwidth]{demonstration_results.png}
\end{figure}
\end{frame}
\begin{frame} \frametitle{Comparison of results}
\begin{figure}
\centering
\includegraphics[width=0.83\textwidth]{comparison_results.png}
\end{figure}
\end{frame}
\section{Discussion}
\begin{frame} \frametitle{Analysis of \textsc{Soloist} model}
\begin{block}{Example of wrong belief state prediction}
\textsf{USER:} I need an expensive place to eat in the west.\\
\textsf{SYSTEM:} Is there a specific type of food you would like?\\
\textsf{USER:} yes, i would like eat indian food.\\
\textbf{True states:} (area, west),(food, indian),(pricerange, expensive)
\textbf{Generated:} \textsl{(area, west),(food, indian),(pricerange, {\color{red} cheap}),({\color{red}area, east})}
\end{block}
\begin{itemize}
\item Open-ended generation
\item Susceptible to generating random slot-value pairs
\item Repeated slot-value generations
\item From the above example:
\begin{itemize}
\item slot \textsl{area} is repeated with a different value
\item value for slot \textsl{pricerange} is incorrect
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Analysis of prompt-based methods}
\begin{block}{Incorrect slot generations by value-based prompt}
\textsf{USER:} I need to be picked up from pizza hut city centre after 04:30\\
\textbf{True states:} (departure, pizza hut city centre), (leave, 04:30)
\textbf{Generated:} \textsl{({\color{red}destination}, pizza hut city centre), ({\color{red}arrive}, 04:30)}
\end{block}
\begin{itemize}
\item Incorrect slots generated under low-resource splits {\small (i.e., \textsl{5-dpd,10-dpd})}
\item Model struggled to distinguish between slots:
\begin{itemize}
\item \textsl{departure vs destination}
\item \textsl{leave vs arrive}
\end{itemize}
\item Possibly due to limited training data
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Limitations of Value-based prompt}
\begin{block}{Repeated Values in Belief States}
\textsf{USER:} hi, can you help me find a 3 star place to stay?\\
\textsf{SYSTEM:} Is there a particular area or price range you prefer?\\
\textsf{USER:} how about a place in centre of town that is of type hotel.\\
\textsf{SYSTEM:} how long would you like to stay, and how many people?\\
\textsf{USER:} Ill arrive on saturday and stay for 3 nights with 3 people.\\
\textbf{True states:} (area, centre), (stars, \underline{3}), (type, hotel), (day, saturday), \\(stay, \underline{3}), (people, \underline{3})
\end{block}
\begin{itemize}
\item User requirements may have repeated values in belief states
\item Value for \textsl{stars}, \textsl{stay}, and \textsl{people} is the same
\item Value-based prompt can only generate one slot for all the repeated values
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Error Analysis of Value Extraction}
\begin{block}{Problems with Value Extraction}
\textsf{USER:} I want a place to stay that has free wifi and free parking.\\
\textsf{SYSTEM:} do you have a preference for area or price range?\\
\textsf{USER:} I dont have a preference. I want a hotel not guesthouse.\\
\textbf{True states:} (area, \underline{dont care}), (internet, \underline{yes}), (parking, \underline{yes}), \\(price, \underline{dont care}), (type, hotel)\\
\textbf{Extracted Values:} \textsl{free}, \textsl{hotel}\\
\hrulefill \\
\textsf{USER:} I kind of need help finding a nice hotel in the north part of town.\\
\textbf{True states:} (area, north), (price, expensive), (type, hotel)\\
\textbf{Extracted Values:} \textsl{\color{red}kind}, \textsl{\color{red}nice}, \textsl{hotel}, \textsl{north}
\end{block}
\begin{itemize}
\item Value Extraction on test split
\begin{itemize}
\item Accuracy of \textsl{79\%} on all the values
\item Turn-level accuracy of \textsl{49\%}
\end{itemize}
\item Drawbacks of extracting values from POS tags
\end{itemize}
\end{frame}
\section{Conclusion}
\begin{frame} \frametitle{Conclusion}
\begin{itemize}
\item Prompt-based methods learned the DST task efficiently under low-resource few-shot settings without relying on the ontology.
\item Prompt-based methods significantly outperformed the baseline \textsc{Soloist} model under low-resource settings.
\item Some limitations in the prompt-based approach
\item Prompt Ensemble model only achieved minor improvements over single value-based prompt
\item Performance of Prompt Augmentation is limited due to insufficient demonstration examples
\end{itemize}
\end{frame}
\begin{frame} \frametitle{Future work}
\begin{itemize}
\item Explore automated prompt search methods for choosing the right prompts instead of manually creating the templates
\item Improve the value extraction methods
\begin{itemize}
\item Combination of text summarization and semantic tagging
\end{itemize}
\item Can bigger language models perform better in prompting the DST task?
\end{itemize}
\end{frame}
\section{}
\begin{frame}[plain,noframenumbering,allowframebreaks]
\frametitle{References}
\printbibliography[heading=none]
\end{frame}
\section{}
\begin{frame}
\centering \Large
\emph{Thanks for your time!}
\end{frame}
\end{document}
%% --- END OF FILE

Binary file not shown.

Binary file not shown.
Loading…
Cancel
Save