@article{yang2022prompt, author = {Yuting Yang and Wenqiang Lei and Juan Cao and Jintao Li and Tat{-}Seng Chua}, title = {Prompt Learning for Few-Shot Dialogue State Tracking}, journal = {CoRR}, volume = {abs/2201.05780}, year = {2022}, url = {https://arxiv.org/abs/2201.05780}, eprinttype = {arXiv}, eprint = {2201.05780}, timestamp = {Mon, 18 Jul 2022 13:17:40 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2201-05780.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{liu2021ppp, author = {Pengfei Liu and Weizhe Yuan and Jinlan Fu and Zhengbao Jiang and Hiroaki Hayashi and Graham Neubig}, title = {Pre-train, Prompt, and Predict: {A} Systematic Survey of Prompting Methods in Natural Language Processing}, journal = {CoRR}, volume = {abs/2107.13586}, year = {2021}, url = {https://arxiv.org/abs/2107.13586}, eprinttype = {arXiv}, eprint = {2107.13586}, timestamp = {Tue, 03 Aug 2021 14:53:34 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2107-13586.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{brown2020gpt3, author = {Tom B. Brown and Benjamin Mann and Nick Ryder and Melanie Subbiah and Jared Kaplan and Prafulla Dhariwal and Arvind Neelakantan and Pranav Shyam and Girish Sastry and Amanda Askell and Sandhini Agarwal and Ariel Herbert{-}Voss and Gretchen Krueger and Tom Henighan and Rewon Child and Aditya Ramesh and Daniel M. Ziegler and Jeffrey Wu and Clemens Winter and Christopher Hesse and Mark Chen and Eric Sigler and Mateusz Litwin and Scott Gray and Benjamin Chess and Jack Clark and Christopher Berner and Sam McCandlish and Alec Radford and Ilya Sutskever and Dario Amodei}, title = {Language Models are Few-Shot Learners}, journal = {CoRR}, volume = {abs/2005.14165}, year = {2020}, url = {https://arxiv.org/abs/2005.14165}, eprinttype = {arXiv}, eprint = {2005.14165}, timestamp = {Wed, 03 Jun 2020 11:36:54 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2005-14165.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{madotto2021fsb, author = {Andrea Madotto and Zhaojiang Lin and Genta Indra Winata and Pascale Fung}, title = {Few-Shot Bot: Prompt-Based Learning for Dialogue Systems}, journal = {CoRR}, volume = {abs/2110.08118}, year = {2021}, url = {https://arxiv.org/abs/2110.08118}, eprinttype = {arXiv}, eprint = {2110.08118}, timestamp = {Fri, 22 Oct 2021 13:33:09 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2110-08118.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{radford2018gpt, title={Improving language understanding by generative pre-training}, author={Radford, Alec and Narasimhan, Karthik and Salimans, Tim and Sutskever, Ilya and others}, year={2018}, publisher={OpenAI} } @article{radford2019gpt2, title={Language models are unsupervised multitask learners}, author={Radford, Alec and Wu, Jeffrey and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya and others}, journal={OpenAI blog}, volume={1}, number={8}, pages={9}, year={2019} } @inproceedings{devlin2019bert, title = "{BERT}: Pre-training of Deep Bidirectional Transformers for Language Understanding", author = "Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina", booktitle = "Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)", month = jun, year = "2019", address = "Minneapolis, Minnesota", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/N19-1423", doi = "10.18653/v1/N19-1423", pages = "4171--4186", abstract = "We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations from Transformers. Unlike recent language representation models (Peters et al., 2018a; Radford et al., 2018), BERT is designed to pre-train deep bidirectional representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result, the pre-trained BERT model can be fine-tuned with just one additional output layer to create state-of-the-art models for a wide range of tasks, such as question answering and language inference, without substantial task-specific architecture modifications. BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural language processing tasks, including pushing the GLUE score to 80.5 (7.7 point absolute improvement), MultiNLI accuracy to 86.7{\%} (4.6{\%} absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement).", } @article{peng2021soloist, title = "{SOLOIST:} Building Task Bots at Scale with Transfer Learning and Machine Teaching", author = "Peng, Baolin and Li, Chunyuan and Li, Jinchao and Shayandeh, Shahin and Liden, Lars and Gao, Jianfeng", journal = "Transactions of the Association for Computational Linguistics", volume = "9", year = "2021", address = "Cambridge, MA", publisher = "MIT Press", url = "https://aclanthology.org/2021.tacl-1.49", doi = "10.1162/tacl_a_00399", pages = "807--824", abstract = "Abstract We present a new method, Soloist,1 that uses transfer learning and machine teaching to build task bots at scale. We parameterize classical modular task-oriented dialog systems using a Transformer-based auto-regressive language model, which subsumes different dialog modules into a single neural model. We pre-train, on heterogeneous dialog corpora, a task-grounded response generation model, which can generate dialog responses grounded in user goals and real-world knowledge for task completion. The pre-trained model can be efficiently adapted to accomplish new tasks with a handful of task-specific dialogs via machine teaching, where training samples are generated by human teachers interacting with the system. Experiments show that (i)Soloist creates new state-of-the-art on well-studied task-oriented dialog benchmarks, including CamRest676 and MultiWOZ; (ii) in the few-shot fine-tuning settings, Soloist significantly outperforms existing methods; and (iii) the use of machine teaching substantially reduces the labeling cost of fine-tuning. The pre-trained models and codes are available at https://aka.ms/soloist.", } @article{lee2021sdp, author = {Chia{-}Hsuan Lee and Hao Cheng and Mari Ostendorf}, title = {Dialogue State Tracking with a Language Model using Schema-Driven Prompting}, journal = {CoRR}, volume = {abs/2109.07506}, year = {2021}, url = {https://arxiv.org/abs/2109.07506}, eprinttype = {arXiv}, eprint = {2109.07506}, timestamp = {Wed, 03 Nov 2021 08:48:34 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2109-07506.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{eric2019multiwoz, author = {Mihail Eric and Rahul Goel and Shachi Paul and Abhishek Sethi and Sanchit Agarwal and Shuyang Gao and Dilek Hakkani{-}T{\"{u}}r}, title = {MultiWOZ 2.1: Multi-Domain Dialogue State Corrections and State Tracking Baselines}, journal = {CoRR}, volume = {abs/1907.01669}, year = {2019}, url = {http://arxiv.org/abs/1907.01669}, eprinttype = {arXiv}, eprint = {1907.01669}, timestamp = {Fri, 09 Aug 2019 10:00:01 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1907-01669.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{budzianowski2018multiwoz, title = "{M}ulti{WOZ} - A Large-Scale Multi-Domain {W}izard-of-{O}z Dataset for Task-Oriented Dialogue Modelling", author = "Budzianowski, Pawe{\l} and Wen, Tsung-Hsien and Tseng, Bo-Hsiang and Casanueva, I{\~n}igo and Ultes, Stefan and Ramadan, Osman and Ga{\v{s}}i{\'c}, Milica", booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing", month = oct # "-" # nov, year = "2018", address = "Brussels, Belgium", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/D18-1547", doi = "10.18653/v1/D18-1547", pages = "5016--5026", abstract = "Even though machine learning has become the major scene in dialogue research community, the real breakthrough has been blocked by the scale of data available.To address this fundamental obstacle, we introduce the Multi-Domain Wizard-of-Oz dataset (MultiWOZ), a fully-labeled collection of human-human written conversations spanning over multiple domains and topics.At a size of 10k dialogues, it is at least one order of magnitude larger than all previous annotated task-oriented corpora.The contribution of this work apart from the open-sourced dataset is two-fold:firstly, a detailed description of the data collection procedure along with a summary of data structure and analysis is provided. The proposed data-collection pipeline is entirely based on crowd-sourcing without the need of hiring professional annotators;secondly, a set of benchmark results of belief tracking, dialogue act and response generation is reported, which shows the usability of the data and sets a baseline for future studies.", } @inproceedings{min2020dsi, title = {Dialogue State Induction Using Neural Latent Variable Models}, author = {Min, Qingkai and Qin, Libo and Teng, Zhiyang and Liu, Xiao and Zhang, Yue}, booktitle = {Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence, {IJCAI-20}}, publisher = {International Joint Conferences on Artificial Intelligence Organization}, editor = {Christian Bessiere}, pages = {3845--3852}, year = {2020}, month = {7}, note = {Main track}, doi = {10.24963/ijcai.2020/532}, url = {https://doi.org/10.24963/ijcai.2020/532}, } @inproceedings{gao2021lmbff, title = "Making Pre-trained Language Models Better Few-shot Learners", author = "Gao, Tianyu and Fisch, Adam and Chen, Danqi", booktitle = "Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)", month = aug, year = "2021", address = "Online", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2021.acl-long.295", doi = "10.18653/v1/2021.acl-long.295", pages = "3816--3830", abstract = "The recent GPT-3 model (Brown et al., 2020) achieves remarkable few-shot performance solely by leveraging a natural-language prompt and a few task demonstrations as input context. Inspired by their findings, we study few-shot learning in a more practical scenario, where we use smaller language models for which fine-tuning is computationally efficient. We present LM-BFF{---}better few-shot fine-tuning of language models{---}a suite of simple and complementary techniques for fine-tuning language models on a small number of annotated examples. Our approach includes (1) prompt-based fine-tuning together with a novel pipeline for automating prompt generation; and (2) a refined strategy for dynamically and selectively incorporating demonstrations into each context. Finally, we present a systematic evaluation for analyzing few-shot performance on a range of NLP tasks, including classification and regression. Our experiments demonstrate that our methods combine to dramatically outperform standard fine-tuning procedures in this low resource setting, achieving up to 30{\%} absolute improvement, and 11{\%} on average across all tasks. Our approach makes minimal assumptions on task resources and domain expertise, and hence constitutes a strong task-agnostic method for few-shot learning.", } @inproceedings{cui2021template, title = "Template-Based Named Entity Recognition Using {BART}", author = "Cui, Leyang and Wu, Yu and Liu, Jian and Yang, Sen and Zhang, Yue", booktitle = "Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021", month = aug, year = "2021", address = "Online", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2021.findings-acl.161", doi = "10.18653/v1/2021.findings-acl.161", pages = "1835--1845", } @inproceedings{schick2021pet, title = "Few-Shot Text Generation with Natural Language Instructions", author = {Schick, Timo and Sch{\"u}tze, Hinrich}, booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing", month = nov, year = "2021", address = "Online and Punta Cana, Dominican Republic", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2021.emnlp-main.32", doi = "10.18653/v1/2021.emnlp-main.32", pages = "390--402", abstract = "Providing pretrained language models with simple task descriptions in natural language enables them to solve some tasks in a fully unsupervised fashion. Moreover, when combined with regular learning from examples, this idea yields impressive few-shot results for a wide range of text classification tasks. It is also a promising direction to improve data efficiency in generative settings, but there are several challenges to using a combination of task descriptions and example-based learning for text generation. In particular, it is crucial to find task descriptions that are easy to understand for the pretrained model and to ensure that it actually makes good use of them; furthermore, effective measures against overfitting have to be implemented. In this paper, we show how these challenges can be tackled: We introduce GenPET, a method for text generation that is based on pattern-exploiting training, a recent approach for combining textual instructions with supervised learning that only works for classification tasks. On several summarization and headline generation datasets, GenPET gives consistent improvements over strong baselines in few-shot settings.", }