From 13180169ed84349824f526477e1250f9b4d0cfe4 Mon Sep 17 00:00:00 2001 From: Pavan Mandava Date: Thu, 24 Nov 2022 13:08:06 +0100 Subject: [PATCH] added some slot value corrections & different num of epochs in training scripts --- .../100-dpd/train.soloist.json | 8 +- .../250-dpd/train.soloist.json | 22 +- data/prompt-learning/test/test.soloist.json | 517 ++++++++---------- data/prompt-learning/valid/valid.soloist.json | 432 +++++++-------- prompt-learning/prompt_utils.py | 2 +- prompt-learning/train_prompting.sh | 15 +- utils/corenlp.py | 2 +- utils/create_dataset.py | 4 +- utils/regexner.rules | 12 +- 9 files changed, 484 insertions(+), 530 deletions(-) diff --git a/data/prompt-learning/100-dpd/train.soloist.json b/data/prompt-learning/100-dpd/train.soloist.json index 0e0f16a..48b2e52 100644 --- a/data/prompt-learning/100-dpd/train.soloist.json +++ b/data/prompt-learning/100-dpd/train.soloist.json @@ -14879,7 +14879,7 @@ "taxi" ], "belief_states": [ - "departure = saint catharines college" + "departure = saint catherines college" ] }, { @@ -14892,7 +14892,7 @@ "taxi" ], "belief_states": [ - "departure = saint catharines college", + "departure = saint catherines college", "arrive = 01:15" ] }, @@ -14909,7 +14909,7 @@ ], "belief_states": [ "destination = loch fyne", - "departure = saint catharines college", + "departure = saint catherines college", "arrive = 01:15" ] }, @@ -14928,7 +14928,7 @@ ], "belief_states": [ "destination = loch fyne", - "departure = saint catharines college", + "departure = saint catherines college", "arrive = 01:15" ] }, diff --git a/data/prompt-learning/250-dpd/train.soloist.json b/data/prompt-learning/250-dpd/train.soloist.json index 33cbf55..0053f22 100644 --- a/data/prompt-learning/250-dpd/train.soloist.json +++ b/data/prompt-learning/250-dpd/train.soloist.json @@ -16037,7 +16037,7 @@ ], "belief_states": [ "destination = pizza hut fen ditton", - "departure = saint catharines college" + "departure = saint catherines college" ] }, { @@ -16056,7 +16056,7 @@ "belief_states": [ "leave = 11:45", "destination = pizza hut fen ditton", - "departure = saint catharines college" + "departure = saint catherines college" ] }, { @@ -16077,7 +16077,7 @@ "belief_states": [ "leave = 11:45", "destination = pizza hut fen ditton", - "departure = saint catharines college" + "departure = saint catherines college" ] }, { @@ -32656,7 +32656,7 @@ "taxi" ], "belief_states": [ - "departure = saint catharines college" + "departure = saint catherines college" ] }, { @@ -32669,7 +32669,7 @@ "taxi" ], "belief_states": [ - "departure = saint catharines college", + "departure = saint catherines college", "arrive = 01:15" ] }, @@ -32686,7 +32686,7 @@ ], "belief_states": [ "destination = loch fyne", - "departure = saint catharines college", + "departure = saint catherines college", "arrive = 01:15" ] }, @@ -32705,7 +32705,7 @@ ], "belief_states": [ "destination = loch fyne", - "departure = saint catharines college", + "departure = saint catherines college", "arrive = 01:15" ] }, @@ -38563,7 +38563,7 @@ "taxi" ], "belief_states": [ - "departure = saint catharines college" + "departure = saint catherines college" ] }, { @@ -38577,7 +38577,7 @@ ], "belief_states": [ "destination = rosas bed and breakfast", - "departure = saint catharines college" + "departure = saint catherines college" ] }, { @@ -38594,7 +38594,7 @@ "belief_states": [ "leave = 13:00", "destination = rosas bed and breakfast", - "departure = saint catharines college" + "departure = saint catherines college" ] }, { @@ -38613,7 +38613,7 @@ "belief_states": [ "leave = 13:00", "destination = rosas bed and breakfast", - "departure = saint catharines college" + "departure = saint catherines college" ] }, { diff --git a/data/prompt-learning/test/test.soloist.json b/data/prompt-learning/test/test.soloist.json index 260885b..5ee6a08 100644 --- a/data/prompt-learning/test/test.soloist.json +++ b/data/prompt-learning/test/test.soloist.json @@ -193,8 +193,8 @@ "3", "4", "2", - "saturday", "1", + "saturday", "guesthouse" ], "belief_states": [ @@ -229,8 +229,8 @@ "3", "4", "2", - "saturday", "1", + "saturday", "guesthouse" ], "belief_states": [ @@ -251,7 +251,7 @@ "hotel" ], "values": [ - "city centre north" + "centre north" ], "belief_states": [ "name = city centre north b and b" @@ -270,7 +270,7 @@ "1", "5", "friday", - "city centre north" + "centre north" ], "belief_states": [ "name = city centre north b and b", @@ -294,7 +294,7 @@ "1", "5", "friday", - "city centre north" + "centre north" ], "belief_states": [ "name = city centre north b and b", @@ -981,7 +981,7 @@ "values": [ "09:00", "wagamama", - "theatre" + "mumford theatre" ], "belief_states": [ "destination = mumford theatre", @@ -1427,8 +1427,8 @@ ], "values": [ "13:30", - "sunday", "8", + "sunday", "cambridge", "ely" ], @@ -1455,8 +1455,8 @@ ], "values": [ "13:30", - "sunday", "8", + "sunday", "cambridge", "ely" ], @@ -1485,8 +1485,8 @@ ], "values": [ "13:30", - "sunday", "8", + "sunday", "cambridge", "ely" ], @@ -1582,8 +1582,8 @@ "dont care", "cheap", "4", - "wednesday", "5", + "wednesday", "guesthouse" ], "belief_states": [ @@ -1618,8 +1618,8 @@ "dont care", "cheap", "4", - "wednesday", "5", + "wednesday", "guesthouse" ], "belief_states": [ @@ -1894,8 +1894,8 @@ "values": [ "expensive", "2", - "saturday", "11:45", + "saturday", "the peking restaurant", "chinese", "south" @@ -1929,8 +1929,8 @@ "values": [ "expensive", "2", - "saturday", "11:45", + "saturday", "the peking restaurant", "chinese", "south" @@ -1966,9 +1966,9 @@ "values": [ "expensive", "2", - "saturday", "11:45", "excellent", + "saturday", "the peking restaurant", "chinese", "south" @@ -2091,8 +2091,8 @@ "values": [ "cheap", "1", - "sunday", "free", + "sunday", "hotel", "north" ], @@ -2127,8 +2127,8 @@ "values": [ "cheap", "1", - "sunday", "free", + "sunday", "guesthouse", "north" ], @@ -2165,8 +2165,8 @@ "values": [ "cheap", "1", - "sunday", "free", + "sunday", "guesthouse", "north" ], @@ -2544,7 +2544,6 @@ "free", "1", "4", - "tuesday", "wednesday", "guesthouse", "centre" @@ -2578,7 +2577,6 @@ "free", "1", "4", - "tuesday", "wednesday", "guesthouse", "centre" @@ -2619,8 +2617,8 @@ "train" ], "values": [ - "friday", "21:15", + "friday", "kings lynn", "cambridge" ], @@ -2643,9 +2641,9 @@ "train" ], "values": [ - "friday", "21:15", "2", + "friday", "kings lynn", "cambridge" ], @@ -2671,9 +2669,9 @@ "train" ], "values": [ - "friday", "21:15", "2", + "friday", "kings lynn", "cambridge" ], @@ -2733,8 +2731,8 @@ "values": [ "expensive", "4", - "wednesday", "5", + "wednesday", "centre" ], "belief_states": [ @@ -2763,8 +2761,8 @@ "values": [ "expensive", "4", - "wednesday", "5", + "wednesday", "centre" ], "belief_states": [ @@ -2795,8 +2793,8 @@ "values": [ "expensive", "4", - "wednesday", "5", + "wednesday", "centre" ], "belief_states": [ @@ -2829,8 +2827,8 @@ "values": [ "expensive", "4", - "wednesday", "5", + "wednesday", "centre" ], "belief_states": [ @@ -2966,8 +2964,8 @@ "1", "5", "14:00", - "thursday", "13:00", + "thursday", "cambridge chop house", "british", "centre" @@ -3003,8 +3001,8 @@ "1", "5", "14:00", - "thursday", "13:00", + "thursday", "cambridge chop house", "british", "centre" @@ -3042,8 +3040,8 @@ "1", "5", "14:00", - "thursday", "13:00", + "thursday", "cambridge chop house", "british", "centre" @@ -3844,8 +3842,8 @@ "train" ], "values": [ - "friday", "15:15", + "friday", "norwich", "cambridge" ], @@ -3872,8 +3870,8 @@ "train" ], "values": [ - "friday", "15:15", + "friday", "norwich", "cambridge" ], @@ -3903,8 +3901,8 @@ "train" ], "values": [ - "friday", "15:15", + "friday", "norwich", "cambridge" ], @@ -3963,8 +3961,8 @@ "values": [ "favorite", "1", - "saturday", "12:45", + "saturday", "centre", "italian" ], @@ -3993,9 +3991,9 @@ "values": [ "favorite", "1", - "saturday", "12:45", "terrific", + "saturday", "centre", "italian" ], @@ -4026,9 +4024,9 @@ "values": [ "favorite", "1", - "saturday", "12:45", "terrific", + "saturday", "centre", "italian" ], @@ -4061,9 +4059,9 @@ "values": [ "favorite", "1", - "saturday", "12:45", "terrific", + "saturday", "centre", "italian" ], @@ -4152,8 +4150,8 @@ "free", "cheap", "1", - "monday", - "5" + "5", + "monday" ], "belief_states": [ "area = dont care", @@ -4182,8 +4180,8 @@ "free", "cheap", "1", - "monday", - "5" + "5", + "monday" ], "belief_states": [ "area = dont care", @@ -4216,9 +4214,9 @@ "free", "cheap", "1", - "monday", "5", - "3" + "3", + "monday" ], "belief_states": [ "name = alexander bed and breakfast", @@ -4254,9 +4252,9 @@ "free", "cheap", "1", - "monday", "5", - "3" + "3", + "monday" ], "belief_states": [ "area = dont care", @@ -4318,8 +4316,8 @@ "values": [ "high", "1", - "saturday", "19:15", + "saturday", "indian", "west" ], @@ -4349,8 +4347,8 @@ "values": [ "high", "1", - "saturday", "19:15", + "saturday", "indian", "west" ], @@ -4383,9 +4381,7 @@ ], "values": [ "the oak bistro", - "lammas land", - "fen causeway", - "park" + "sheeps green and lammas land park fen causeway" ], "belief_states": [ "destination = the oak bistro", @@ -4406,9 +4402,7 @@ "values": [ "16:45", "the oak bistro", - "lammas land", - "fen causeway", - "park" + "sheeps green and lammas land park fen causeway" ], "belief_states": [ "destination = the oak bistro", @@ -4432,9 +4426,7 @@ "values": [ "16:45", "the oak bistro", - "lammas land", - "fen causeway", - "park" + "sheeps green and lammas land park fen causeway" ], "belief_states": [ "destination = the oak bistro", @@ -4515,8 +4507,8 @@ ], "values": [ "17:00", - "friday", "7", + "friday", "norwich" ], "belief_states": [ @@ -4543,8 +4535,8 @@ ], "values": [ "17:00", - "friday", "7", + "friday", "norwich" ], "belief_states": [ @@ -4573,8 +4565,8 @@ ], "values": [ "17:00", - "friday", "7", + "friday", "norwich" ], "belief_states": [ @@ -4633,8 +4625,8 @@ ], "values": [ "09:15", - "thursday", "7", + "thursday", "stansted airport", "cambridge" ], @@ -4661,8 +4653,8 @@ ], "values": [ "09:15", - "thursday", "7", + "thursday", "stansted airport", "cambridge" ], @@ -4691,8 +4683,8 @@ ], "values": [ "09:15", - "thursday", "7", + "thursday", "stansted airport", "cambridge" ], @@ -5094,8 +5086,8 @@ "moderate", "6", "12:00", - "tuesday", "forward", + "tuesday", "the varsity restaurant", "centre", "international" @@ -5129,8 +5121,8 @@ "moderate", "6", "12:00", - "tuesday", "forward", + "tuesday", "the varsity restaurant", "centre", "international" @@ -5191,8 +5183,8 @@ "train" ], "values": [ - "saturday", "15:00", + "saturday", "cambridge", "london liverpool street" ], @@ -5217,8 +5209,8 @@ "train" ], "values": [ - "saturday", "15:00", + "saturday", "cambridge", "london liverpool street" ], @@ -5577,9 +5569,9 @@ ], "values": [ "expensive", - "wednesday", "14:00", "5", + "wednesday", "east", "british" ], @@ -5608,10 +5600,10 @@ ], "values": [ "expensive", - "wednesday", "14:00", "5", "13:00", + "wednesday", "east", "british" ], @@ -5642,10 +5634,10 @@ ], "values": [ "expensive", - "wednesday", "14:00", "5", "13:00", + "wednesday", "east", "british" ], @@ -5667,8 +5659,8 @@ "train" ], "values": [ - "sunday", - "18:00" + "18:00", + "sunday" ], "belief_states": [ "day = sunday", @@ -5685,8 +5677,8 @@ "train" ], "values": [ - "sunday", "18:00", + "sunday", "cambridge", "leicester" ], @@ -5709,9 +5701,9 @@ "train" ], "values": [ - "sunday", "18:00", "7", + "sunday", "cambridge", "leicester" ], @@ -5737,9 +5729,9 @@ "train" ], "values": [ - "sunday", "18:00", "7", + "sunday", "cambridge", "leicester" ], @@ -5767,9 +5759,9 @@ "train" ], "values": [ - "sunday", "18:00", "7", + "sunday", "cambridge", "leicester" ], @@ -5799,9 +5791,9 @@ "train" ], "values": [ - "sunday", "18:00", "7", + "sunday", "cambridge", "leicester" ], @@ -5918,9 +5910,9 @@ "values": [ "expensive", "4", - "sunday", "3", "2", + "sunday", "gonville", "hotel", "centre" @@ -5955,9 +5947,9 @@ "values": [ "expensive", "4", - "sunday", "3", "2", + "sunday", "gonville", "hotel", "centre" @@ -5998,8 +5990,8 @@ "train" ], "values": [ - "friday", "14:45", + "friday", "ely", "cambridge" ], @@ -6022,10 +6014,10 @@ "train" ], "values": [ - "friday", "14:45", "1", "3", + "friday", "ely", "cambridge" ], @@ -6052,10 +6044,10 @@ "train" ], "values": [ - "friday", "14:45", "1", "3", + "friday", "ely", "cambridge" ], @@ -6105,8 +6097,7 @@ ], "values": [ "12:45", - "cambridge", - "museum" + "cambridge and county folk museum" ], "belief_states": [ "departure = cambridge and county folk museum", @@ -6128,9 +6119,8 @@ ], "values": [ "12:45", - "cambridge", - "trinity college", - "museum" + "cambridge and county folk museum", + "trinity college" ], "belief_states": [ "destination = trinity college", @@ -6346,8 +6336,8 @@ "values": [ "expensive", "8", - "friday", "12:00", + "friday", "fitzbillies restaurant", "british", "centre" @@ -6378,9 +6368,9 @@ "values": [ "expensive", "8", - "friday", "12:00", "11:00", + "friday", "fitzbillies restaurant", "british", "centre" @@ -6413,9 +6403,9 @@ "values": [ "expensive", "8", - "friday", "12:00", "11:00", + "friday", "fitzbillies restaurant", "british", "centre" @@ -6504,8 +6494,8 @@ "values": [ "expensive", "4", - "monday", "15:15", + "monday", "centre", "turkish" ], @@ -6536,7 +6526,6 @@ "values": [ "expensive", "4", - "monday", "15:15", "saturday", "centre", @@ -6571,9 +6560,8 @@ "values": [ "expensive", "4", - "monday", "15:15", - "saturday", + "monday", "centre", "turkish" ], @@ -6608,7 +6596,6 @@ "values": [ "expensive", "4", - "monday", "15:15", "saturday", "centre", @@ -6647,7 +6634,6 @@ "values": [ "expensive", "4", - "monday", "15:15", "saturday", "the meze bar", @@ -6689,7 +6675,6 @@ "values": [ "expensive", "4", - "monday", "15:15", "saturday", "the meze bar", @@ -7015,7 +7000,7 @@ "hotel" ], "values": [ - "hotel" + "lensfield hotel" ], "belief_states": [ "name = lensfield hotel" @@ -7034,7 +7019,7 @@ "7", "3", "sunday", - "hotel" + "lensfield hotel" ], "belief_states": [ "name = lensfield hotel", @@ -7058,7 +7043,7 @@ "7", "3", "sunday", - "hotel" + "lensfield hotel" ], "belief_states": [ "name = lensfield hotel", @@ -7114,8 +7099,8 @@ ], "values": [ "2", - "sunday", "13:30", + "sunday", "centre", "chinese" ], @@ -7142,8 +7127,8 @@ ], "values": [ "2", - "sunday", "13:30", + "sunday", "centre", "chinese" ], @@ -7173,8 +7158,8 @@ ], "values": [ "2", - "sunday", "13:30", + "sunday", "centre", "chinese" ], @@ -7207,9 +7192,9 @@ ], "values": [ "2", - "sunday", "13:30", "12:30", + "sunday", "centre", "chinese" ], @@ -7244,9 +7229,9 @@ ], "values": [ "2", - "sunday", "13:30", "12:30", + "sunday", "centre", "chinese" ], @@ -7371,9 +7356,9 @@ "values": [ "free", "4", - "wednesday", "1", "5", + "wednesday", "hotel", "east" ], @@ -7404,9 +7389,9 @@ "values": [ "free", "4", - "wednesday", "1", "5", + "wednesday", "hotel", "east" ], @@ -7429,8 +7414,7 @@ "taxi" ], "values": [ - "ruskin", - "gallery" + "ruskin gallery" ], "belief_states": [ "destination = ruskin gallery" @@ -7447,9 +7431,8 @@ ], "values": [ "07:15", - "ruskin", - "saffron brasserie", - "gallery" + "ruskin gallery", + "saffron brasserie" ], "belief_states": [ "leave = 07:15", @@ -7470,9 +7453,8 @@ ], "values": [ "07:15", - "ruskin", - "saffron brasserie", - "gallery" + "ruskin gallery", + "saffron brasserie" ], "belief_states": [ "leave = 07:15", @@ -7539,8 +7521,8 @@ "1", "4", "20:00", - "wednesday", "19:00", + "wednesday", "centre" ], "belief_states": [ @@ -7570,8 +7552,8 @@ "1", "4", "20:00", - "wednesday", "19:00", + "wednesday", "centre" ], "belief_states": [ @@ -7603,8 +7585,8 @@ "1", "4", "20:00", - "wednesday", "19:00", + "wednesday", "centre" ], "belief_states": [ @@ -8893,8 +8875,8 @@ ], "values": [ "03:00", - "wednesday", "14:00", + "wednesday", "cambridge", "london kings cross" ], @@ -8922,9 +8904,9 @@ ], "values": [ "03:00", - "wednesday", "14:00", "1", + "wednesday", "cambridge", "london kings cross" ], @@ -8956,9 +8938,9 @@ ], "values": [ "03:00", - "wednesday", "14:00", "1", + "wednesday", "cambridge", "london kings cross" ], @@ -9027,8 +9009,8 @@ "expensive", "3", "14:00", - "saturday", "13:00", + "saturday", "chinese" ], "belief_states": [ @@ -9057,8 +9039,8 @@ "expensive", "3", "14:00", - "saturday", "13:00", + "saturday", "chinese" ], "belief_states": [ @@ -9089,8 +9071,8 @@ "expensive", "3", "14:00", - "saturday", "13:00", + "saturday", "chinese" ], "belief_states": [ @@ -9151,8 +9133,8 @@ "values": [ "expensive", "8", - "wednesday", "15:30", + "wednesday", "centre" ], "belief_states": [ @@ -9181,8 +9163,8 @@ "values": [ "expensive", "8", - "wednesday", "15:30", + "wednesday", "centre" ], "belief_states": [ @@ -9213,8 +9195,8 @@ "values": [ "expensive", "8", - "wednesday", "15:30", + "wednesday", "centre" ], "belief_states": [ @@ -9247,8 +9229,8 @@ "values": [ "expensive", "8", - "wednesday", "15:30", + "wednesday", "centre" ], "belief_states": [ @@ -9269,6 +9251,7 @@ "hotel" ], "values": [ + "warkworth house", "hotel" ], "belief_states": [ @@ -9286,6 +9269,7 @@ ], "values": [ "tuesday", + "warkworth house", "hotel" ], "belief_states": [ @@ -9305,8 +9289,9 @@ "hotel" ], "values": [ - "tuesday", "3", + "tuesday", + "warkworth house", "hotel" ], "belief_states": [ @@ -9329,9 +9314,10 @@ "hotel" ], "values": [ - "tuesday", "3", "1", + "tuesday", + "warkworth house", "hotel" ], "belief_states": [ @@ -9357,9 +9343,10 @@ "hotel" ], "values": [ - "tuesday", "3", "1", + "tuesday", + "warkworth house", "hotel" ], "belief_states": [ @@ -9498,8 +9485,8 @@ "train" ], "values": [ - "friday", - "18:30" + "18:30", + "friday" ], "belief_states": [ "leave = 18:30", @@ -9516,8 +9503,8 @@ "train" ], "values": [ - "friday", "18:30", + "friday", "stevenage", "cambridge" ], @@ -9540,9 +9527,9 @@ "train" ], "values": [ - "friday", "18:30", "8", + "friday", "stevenage", "cambridge" ], @@ -9568,9 +9555,9 @@ "train" ], "values": [ - "friday", "18:30", "8", + "friday", "stevenage", "cambridge" ], @@ -9698,8 +9685,8 @@ "train" ], "values": [ - "saturday", "12:30", + "saturday", "cambridge", "stevenage" ], @@ -9724,8 +9711,8 @@ "train" ], "values": [ - "saturday", "12:30", + "saturday", "cambridge", "stevenage" ], @@ -9752,8 +9739,8 @@ "train" ], "values": [ - "saturday", "12:30", + "saturday", "cambridge", "stevenage" ], @@ -9783,8 +9770,8 @@ "train" ], "values": [ - "saturday", "12:30", + "saturday", "cambridge", "stevenage" ], @@ -10048,8 +10035,8 @@ "confused", "ahead", "4", - "tuesday", "3", + "tuesday", "guesthouse" ], "belief_states": [ @@ -10088,8 +10075,8 @@ "confused", "ahead", "4", - "tuesday", "3", + "tuesday", "guesthouse" ], "belief_states": [ @@ -10130,8 +10117,8 @@ "confused", "ahead", "4", - "tuesday", "3", + "tuesday", "guesthouse" ], "belief_states": [ @@ -10174,8 +10161,8 @@ "confused", "ahead", "4", - "tuesday", "3", + "tuesday", "guesthouse" ], "belief_states": [ @@ -10220,8 +10207,8 @@ "confused", "ahead", "4", - "tuesday", "3", + "tuesday", "guesthouse" ], "belief_states": [ @@ -10322,7 +10309,6 @@ "moderate", "1", "4", - "friday", "wednesday", "hotel" ], @@ -10358,7 +10344,6 @@ "moderate", "1", "4", - "friday", "wednesday", "hotel" ], @@ -10396,7 +10381,6 @@ "moderate", "1", "4", - "friday", "wednesday", "hotel" ], @@ -10819,8 +10803,8 @@ "values": [ "free", "2", - "thursday", "picky", + "thursday", "guesthouse", "north" ], @@ -10854,7 +10838,6 @@ "values": [ "free", "2", - "thursday", "picky", "wednesday", "guesthouse", @@ -10893,7 +10876,6 @@ "values": [ "free", "2", - "thursday", "picky", "wednesday", "guesthouse", @@ -10934,7 +10916,6 @@ "values": [ "free", "2", - "thursday", "picky", "wednesday", "guesthouse", @@ -10977,7 +10958,6 @@ "values": [ "free", "2", - "thursday", "picky", "wednesday", "guesthouse", @@ -11022,8 +11002,8 @@ "train" ], "values": [ - "friday", "12:15", + "friday", "leicester" ], "belief_states": [ @@ -11044,8 +11024,8 @@ "train" ], "values": [ - "friday", "12:15", + "friday", "leicester", "cambridge" ], @@ -11070,9 +11050,9 @@ "train" ], "values": [ - "friday", "12:15", "7", + "friday", "leicester", "cambridge" ], @@ -11100,9 +11080,9 @@ "train" ], "values": [ - "friday", "12:15", "7", + "friday", "leicester", "cambridge" ], @@ -11402,7 +11382,8 @@ ], "values": [ "13:45", - "royal spice" + "royal spice", + "kirkwood house" ], "belief_states": [ "leave = 13:45", @@ -11423,7 +11404,8 @@ ], "values": [ "13:45", - "royal spice" + "royal spice", + "kirkwood house" ], "belief_states": [ "leave = 13:45", @@ -11508,8 +11490,8 @@ "values": [ "moderate", "4", - "saturday", "16:30", + "saturday", "asian oriental", "centre" ], @@ -11541,8 +11523,8 @@ "values": [ "moderate", "4", - "saturday", "16:30", + "saturday", "asian oriental", "centre" ], @@ -11722,8 +11704,8 @@ "values": [ "free", "3", - "saturday", "2", + "saturday", "hotel" ], "belief_states": [ @@ -11758,9 +11740,9 @@ "values": [ "free", "3", - "saturday", "2", "1", + "saturday", "hotel" ], "belief_states": [ @@ -11797,9 +11779,9 @@ "values": [ "free", "3", - "saturday", "2", "1", + "saturday", "hotel" ], "belief_states": [ @@ -12265,8 +12247,8 @@ ], "values": [ "5", - "wednesday", "19:45", + "wednesday", "saigon city" ], "belief_states": [ @@ -12289,8 +12271,8 @@ ], "values": [ "5", - "wednesday", "19:45", + "wednesday", "saigon city" ], "belief_states": [ @@ -12395,6 +12377,7 @@ "hotel" ], "values": [ + "warkworth house", "hotel" ], "belief_states": [ @@ -12414,6 +12397,7 @@ "3", "5", "thursday", + "warkworth house", "hotel" ], "belief_states": [ @@ -12437,8 +12421,8 @@ "values": [ "3", "5", - "thursday", "friday", + "warkworth house", "hotel" ], "belief_states": [ @@ -12464,8 +12448,8 @@ "values": [ "3", "5", - "thursday", "friday", + "warkworth house", "hotel" ], "belief_states": [ @@ -12525,8 +12509,8 @@ "values": [ "cheap", "7", - "tuesday", "11:45", + "tuesday", "spanish", "centre" ], @@ -12556,8 +12540,8 @@ "values": [ "cheap", "7", - "tuesday", "11:45", + "tuesday", "spanish", "centre" ], @@ -12589,8 +12573,8 @@ "values": [ "cheap", "7", - "tuesday", "11:45", + "tuesday", "spanish", "centre" ], @@ -12800,8 +12784,8 @@ "train" ], "values": [ - "friday", - "19:00" + "19:00", + "friday" ], "belief_states": [ "day = friday", @@ -12818,8 +12802,8 @@ "train" ], "values": [ - "friday", "19:00", + "friday", "cambridge" ], "belief_states": [ @@ -12840,8 +12824,8 @@ "train" ], "values": [ - "friday", "19:00", + "friday", "cambridge", "stansted airport" ], @@ -12866,8 +12850,8 @@ "train" ], "values": [ - "friday", "19:00", + "friday", "cambridge", "stansted airport" ], @@ -12894,9 +12878,9 @@ "train" ], "values": [ - "friday", "19:00", "5", + "friday", "cambridge", "stansted airport" ], @@ -12926,9 +12910,9 @@ "train" ], "values": [ - "friday", "19:00", "5", + "friday", "cambridge", "stansted airport" ], @@ -13327,9 +13311,9 @@ "train" ], "values": [ - "saturday", "20:20", "close", + "saturday", "london kings cross", "cambridge" ], @@ -13352,10 +13336,10 @@ "train" ], "values": [ - "saturday", "20:20", "close", "7", + "saturday", "london kings cross", "cambridge" ], @@ -13382,10 +13366,10 @@ "train" ], "values": [ - "saturday", "20:20", "close", "7", + "saturday", "london kings cross", "cambridge" ], @@ -13596,8 +13580,8 @@ "train" ], "values": [ - "sunday", "20:30", + "sunday", "cambridge", "broxbourne" ], @@ -13620,8 +13604,8 @@ "train" ], "values": [ - "sunday", "20:30", + "sunday", "cambridge", "broxbourne" ], @@ -13647,8 +13631,8 @@ "train" ], "values": [ - "sunday", "20:30", + "sunday", "cambridge", "broxbourne" ], @@ -13688,9 +13672,9 @@ "values": [ "moderate", "fabulous", - "sunday", "18:30", "6", + "sunday", "the oak bistro", "british" ], @@ -13718,9 +13702,9 @@ "values": [ "moderate", "fabulous", - "sunday", "18:30", "6", + "sunday", "the oak bistro", "british" ], @@ -13856,8 +13840,8 @@ "1", "8", "3", - "thursday", "forward", + "thursday", "cambridge", "guesthouse" ], @@ -13894,8 +13878,8 @@ "1", "8", "3", - "thursday", "forward", + "thursday", "cambridge", "guesthouse" ], @@ -14038,8 +14022,8 @@ ], "values": [ "13:30", - "tuesday", "8", + "tuesday", "cambridge", "london liverpool street" ], @@ -14066,8 +14050,8 @@ ], "values": [ "13:30", - "tuesday", "8", + "tuesday", "cambridge", "london liverpool street" ], @@ -14174,8 +14158,8 @@ "traditional", "2", "12:00", - "wednesday", "11:00", + "wednesday", "centre", "lebanese" ], @@ -14208,8 +14192,8 @@ "traditional", "2", "12:00", - "wednesday", "11:00", + "wednesday", "centre", "lebanese" ], @@ -14475,8 +14459,8 @@ ], "values": [ "4", - "saturday", "3", + "saturday", "centre", "guesthouse" ], @@ -14508,8 +14492,8 @@ ], "values": [ "4", - "saturday", "3", + "saturday", "centre", "guesthouse" ], @@ -14543,9 +14527,9 @@ ], "values": [ "4", - "saturday", "3", "1", + "saturday", "centre", "guesthouse" ], @@ -14581,9 +14565,9 @@ ], "values": [ "4", - "saturday", "3", "1", + "saturday", "centre", "hotel" ], @@ -14621,9 +14605,9 @@ ], "values": [ "4", - "saturday", "3", "1", + "saturday", "centre", "hotel" ], @@ -14663,10 +14647,10 @@ ], "values": [ "4", - "saturday", "3", "1", "5", + "saturday", "centre", "hotel" ], @@ -14708,10 +14692,10 @@ ], "values": [ "4", - "saturday", "3", "1", "5", + "saturday", "centre", "hotel" ], @@ -14898,8 +14882,8 @@ "moderate", "free", "3", - "friday", "1", + "friday", "guesthouse", "north" ], @@ -14936,8 +14920,8 @@ "moderate", "free", "3", - "friday", "1", + "friday", "guesthouse", "north" ], @@ -15054,9 +15038,9 @@ ], "values": [ "moderate", - "monday", "5", "12:15", + "monday", "east", "italian" ], @@ -15088,9 +15072,9 @@ ], "values": [ "moderate", - "monday", "5", "12:15", + "monday", "east", "italian" ], @@ -15157,9 +15141,8 @@ "1", "cheap", "2", - "tuesday", - "wed", "5", + "wed", "centre" ], "belief_states": [ @@ -15190,9 +15173,8 @@ "1", "cheap", "2", - "tuesday", - "wed", "5", + "wed", "centre" ], "belief_states": [ @@ -15431,8 +15413,8 @@ ], "values": [ "08:15", - "monday", "ready", + "monday", "cambridge", "broxbourne" ], @@ -15565,7 +15547,6 @@ "free", "1", "5", - "friday", "thursday", "hotel", "centre" @@ -15601,8 +15582,6 @@ "free", "1", "5", - "friday", - "thursday", "saturday", "hotel", "centre" @@ -15640,8 +15619,6 @@ "free", "1", "5", - "friday", - "thursday", "saturday", "hotel", "centre" @@ -15681,8 +15658,6 @@ "free", "1", "5", - "friday", - "thursday", "saturday", "hotel", "centre" @@ -15816,8 +15791,8 @@ "values": [ "expensive", "2", - "sunday", "11:15", + "sunday", "italian", "south" ], @@ -15847,8 +15822,8 @@ "values": [ "expensive", "2", - "sunday", "11:15", + "sunday", "italian", "south" ], @@ -16087,9 +16062,8 @@ "moderate", "free", "3", - "thursday", - "wed", "4", + "wed", "alpha-milton guest house", "guesthouse", "north" @@ -16130,9 +16104,8 @@ "moderate", "free", "3", - "thursday", - "wed", "4", + "wed", "alpha-milton guest house", "guesthouse", "north" @@ -16283,8 +16256,8 @@ "free", "4", "1", - "tuesday", "6", + "tuesday", "guesthouse" ], "belief_states": [ @@ -16315,8 +16288,8 @@ "free", "4", "1", - "tuesday", "6", + "tuesday", "guesthouse" ], "belief_states": [ @@ -16349,8 +16322,8 @@ "free", "4", "1", - "tuesday", "6", + "tuesday", "guesthouse" ], "belief_states": [ @@ -16493,8 +16466,8 @@ "train" ], "values": [ - "saturday", "19:30", + "saturday", "cambridge" ], "belief_states": [ @@ -16515,9 +16488,9 @@ "train" ], "values": [ - "saturday", "19:30", "6", + "saturday", "cambridge", "broxbourne" ], @@ -16543,9 +16516,9 @@ "train" ], "values": [ - "saturday", "19:30", "6", + "saturday", "cambridge", "broxbourne" ], @@ -16583,8 +16556,8 @@ "train" ], "values": [ - "thursday", "20:15", + "thursday", "cambridge", "bishops stortford" ], @@ -16607,8 +16580,8 @@ "train" ], "values": [ - "thursday", "20:15", + "thursday", "cambridge", "bishops stortford" ], @@ -16633,9 +16606,9 @@ "train" ], "values": [ - "thursday", "20:15", "7", + "thursday", "cambridge", "bishops stortford" ], @@ -16663,9 +16636,9 @@ "train" ], "values": [ - "thursday", "20:15", "7", + "thursday", "cambridge", "bishops stortford" ], @@ -16809,8 +16782,8 @@ "cheap", "1", "4", - "wednesday", "18:30", + "wednesday", "the kohinoor", "centre", "indian" @@ -16844,9 +16817,9 @@ "cheap", "1", "4", - "wednesday", "18:30", "17:30", + "wednesday", "the kohinoor", "centre", "indian" @@ -17397,8 +17370,8 @@ "values": [ "6", "16:15", - "saturday", "15:15", + "saturday", "italian", "east" ], @@ -17431,8 +17404,8 @@ "values": [ "6", "16:15", - "saturday", "15:15", + "saturday", "italian", "east" ], @@ -17472,8 +17445,8 @@ "train" ], "values": [ - "saturday", "11:30", + "saturday", "cambridge", "birmingham new street" ], @@ -17611,8 +17584,8 @@ "train" ], "values": [ - "saturday", "13:45", + "saturday", "cambridge", "leicester" ], @@ -17637,8 +17610,8 @@ "train" ], "values": [ - "saturday", "13:45", + "saturday", "cambridge", "leicester" ], @@ -17665,8 +17638,8 @@ "train" ], "values": [ - "saturday", "13:45", + "saturday", "cambridge", "leicester" ], @@ -17695,8 +17668,8 @@ "train" ], "values": [ - "saturday", "13:45", + "saturday", "cambridge", "leicester" ], @@ -17939,9 +17912,8 @@ "restaurant" ], "values": [ - "modern", "moderate", - "european" + "modern european" ], "belief_states": [ "food = modern european", @@ -17958,11 +17930,10 @@ "restaurant" ], "values": [ - "modern", "moderate", "matter", "1", - "european", + "modern european", "centre" ], "belief_states": [ @@ -17983,15 +17954,14 @@ "restaurant" ], "values": [ - "modern", "moderate", "matter", "1", "6", "19:15", "saturday", - "european", "de luca cucina and bar", + "modern european", "centre" ], "belief_states": [ @@ -18018,15 +17988,14 @@ "restaurant" ], "values": [ - "modern", "moderate", "matter", "1", "6", "19:15", "saturday", - "european", "de luca cucina and bar", + "modern european", "centre" ], "belief_states": [ @@ -18212,8 +18181,8 @@ ], "values": [ "3", - "monday", "7", + "monday", "hotel" ], "belief_states": [ @@ -18242,9 +18211,9 @@ ], "values": [ "3", - "monday", "7", "1", + "monday", "hotel" ], "belief_states": [ @@ -18275,9 +18244,9 @@ ], "values": [ "3", - "monday", "7", "1", + "monday", "hotel" ], "belief_states": [ @@ -18378,7 +18347,6 @@ "free", "1", "5", - "thursday", "friday", "east" ], @@ -18413,7 +18381,6 @@ "free", "1", "5", - "thursday", "friday", "east" ], @@ -18436,8 +18403,8 @@ "train" ], "values": [ - "wednesday", - "12:30" + "12:30", + "wednesday" ], "belief_states": [ "day = wednesday", @@ -18454,8 +18421,8 @@ "train" ], "values": [ - "wednesday", "12:30", + "wednesday", "cambridge", "london liverpool street" ], @@ -18478,9 +18445,9 @@ "train" ], "values": [ - "wednesday", "12:30", "7", + "wednesday", "cambridge", "london liverpool street" ], @@ -18506,9 +18473,9 @@ "train" ], "values": [ - "wednesday", "12:30", "7", + "wednesday", "cambridge", "london liverpool street" ], @@ -18697,8 +18664,8 @@ "free", "cheap", "3", - "sunday", "nearby", + "sunday", "hotel" ], "belief_states": [ @@ -18736,9 +18703,9 @@ "free", "cheap", "3", - "sunday", "nearby", "4", + "sunday", "hotel" ], "belief_states": [ @@ -18779,9 +18746,9 @@ "free", "cheap", "3", - "sunday", "nearby", "4", + "sunday", "hotel" ], "belief_states": [ @@ -18824,9 +18791,9 @@ "free", "cheap", "3", - "sunday", "nearby", "4", + "sunday", "hotel" ], "belief_states": [ @@ -19111,7 +19078,7 @@ "saint catherines college" ], "belief_states": [ - "destination = saint catharines college", + "destination = saint catherines college", "departure = a and b guest house" ] }, @@ -19130,7 +19097,7 @@ "saint catherines college" ], "belief_states": [ - "destination = saint catharines college", + "destination = saint catherines college", "departure = a and b guest house", "arrive = 06:00" ] @@ -19152,7 +19119,7 @@ "saint catherines college" ], "belief_states": [ - "destination = saint catharines college", + "destination = saint catherines college", "departure = a and b guest house", "arrive = 06:00" ] @@ -20008,8 +19975,8 @@ "values": [ "4", "3", - "friday", "8", + "friday", "guesthouse", "south" ], @@ -20040,8 +20007,8 @@ "values": [ "4", "3", - "friday", "8", + "friday", "guesthouse", "south" ], @@ -20177,8 +20144,8 @@ ], "values": [ "expensive", - "saturday", "15:00", + "saturday", "centre", "mediterranean" ], @@ -20212,8 +20179,8 @@ ], "values": [ "expensive", - "saturday", "15:00", + "saturday", "centre", "mediterranean" ], @@ -20249,8 +20216,8 @@ ], "values": [ "expensive", - "saturday", "15:00", + "saturday", "centre", "mediterranean" ], @@ -20516,8 +20483,8 @@ "train" ], "values": [ - "sunday", "15:00", + "sunday", "cambridge", "birmingham new street" ], @@ -20542,8 +20509,8 @@ "train" ], "values": [ - "sunday", "15:00", + "sunday", "cambridge", "birmingham new street" ], @@ -20570,8 +20537,8 @@ "train" ], "values": [ - "sunday", "15:00", + "sunday", "cambridge", "birmingham new street" ], @@ -20809,7 +20776,7 @@ "taxi" ], "values": [ - "gandhi" + "the gandhi" ], "belief_states": [ "departure = the gandhi" @@ -20826,7 +20793,7 @@ ], "values": [ "02:00", - "gandhi", + "the gandhi", "broxbourne train station" ], "belief_states": [ @@ -20848,7 +20815,7 @@ ], "values": [ "02:00", - "gandhi", + "the gandhi", "broxbourne train station" ], "belief_states": [ @@ -20872,7 +20839,7 @@ ], "values": [ "02:00", - "gandhi", + "the gandhi", "broxbourne train station" ], "belief_states": [ diff --git a/data/prompt-learning/valid/valid.soloist.json b/data/prompt-learning/valid/valid.soloist.json index 69fbc06..d5e11b8 100644 --- a/data/prompt-learning/valid/valid.soloist.json +++ b/data/prompt-learning/valid/valid.soloist.json @@ -77,8 +77,8 @@ "free", "3", "2", - "monday", "excited", + "monday", "guesthouse" ], "belief_states": [ @@ -464,8 +464,8 @@ "expensive", "pricey", "7", - "monday", "19:15", + "monday", "north" ], "belief_states": [ @@ -493,8 +493,8 @@ "expensive", "pricey", "7", - "monday", "19:15", + "monday", "north" ], "belief_states": [ @@ -524,8 +524,8 @@ "expensive", "pricey", "7", - "monday", "19:15", + "monday", "north" ], "belief_states": [ @@ -998,8 +998,8 @@ "moderate", "1", "20:00", - "tuesday", "19:00", + "tuesday", "turkish", "centre" ], @@ -1037,8 +1037,8 @@ "moderate", "1", "20:00", - "tuesday", "19:00", + "tuesday", "turkish", "centre" ], @@ -1078,8 +1078,8 @@ "moderate", "1", "20:00", - "tuesday", "19:00", + "tuesday", "turkish", "centre" ], @@ -1118,8 +1118,8 @@ "train" ], "values": [ - "tuesday", "11:15", + "tuesday", "leicester" ], "belief_states": [ @@ -1140,8 +1140,8 @@ "train" ], "values": [ - "tuesday", "11:15", + "tuesday", "leicester", "cambridge" ], @@ -1166,9 +1166,9 @@ "train" ], "values": [ - "tuesday", "11:15", "4", + "tuesday", "leicester", "cambridge" ], @@ -1196,9 +1196,9 @@ "train" ], "values": [ - "tuesday", "11:15", "4", + "tuesday", "leicester", "cambridge" ], @@ -1256,8 +1256,8 @@ "train" ], "values": [ - "friday", "20:45", + "friday", "peterborough" ], "belief_states": [ @@ -1281,8 +1281,8 @@ "train" ], "values": [ - "friday", "20:45", + "friday", "peterborough" ], "belief_states": [ @@ -1308,8 +1308,8 @@ "train" ], "values": [ - "friday", "20:45", + "friday", "peterborough" ], "belief_states": [ @@ -1337,8 +1337,8 @@ "train" ], "values": [ - "friday", "20:45", + "friday", "peterborough" ], "belief_states": [ @@ -1672,8 +1672,8 @@ "values": [ "6", "5", - "saturday", "1", + "saturday", "huntingdon marriott hotel" ], "belief_states": [ @@ -1699,8 +1699,8 @@ "values": [ "6", "5", - "saturday", "1", + "saturday", "huntingdon marriott hotel" ], "belief_states": [ @@ -1839,8 +1839,8 @@ "values": [ "expensive", "4", - "thursday", "16:45", + "thursday", "graffiti", "british" ], @@ -1867,8 +1867,8 @@ "values": [ "expensive", "4", - "thursday", "16:45", + "thursday", "graffiti", "british" ], @@ -2191,9 +2191,8 @@ "train" ], "values": [ - "saturday", - "wed", "12:45", + "wed", "cambridge", "peterborough" ], @@ -2216,9 +2215,8 @@ "train" ], "values": [ - "saturday", - "wed", "12:45", + "wed", "cambridge", "peterborough" ], @@ -2243,10 +2241,9 @@ "train" ], "values": [ - "saturday", - "wed", "12:45", "7", + "wed", "cambridge", "peterborough" ], @@ -2275,10 +2272,9 @@ "train" ], "values": [ - "saturday", - "wed", "12:45", "7", + "wed", "cambridge", "peterborough" ], @@ -2316,8 +2312,8 @@ "train" ], "values": [ - "saturday", "12:00", + "saturday", "cambridge" ], "belief_states": [ @@ -2339,8 +2335,8 @@ "train" ], "values": [ - "saturday", "12:00", + "saturday", "cambridge" ], "belief_states": [ @@ -2364,8 +2360,8 @@ "train" ], "values": [ - "saturday", "12:00", + "saturday", "cambridge" ], "belief_states": [ @@ -2784,8 +2780,8 @@ "train" ], "values": [ - "tuesday", "20:30", + "tuesday", "stevenage" ], "belief_states": [ @@ -2808,8 +2804,8 @@ "train" ], "values": [ - "tuesday", "20:30", + "tuesday", "stevenage" ], "belief_states": [ @@ -2835,8 +2831,8 @@ "train" ], "values": [ - "tuesday", "20:30", + "tuesday", "stevenage" ], "belief_states": [ @@ -2864,8 +2860,8 @@ "train" ], "values": [ - "tuesday", "20:30", + "tuesday", "stevenage" ], "belief_states": [ @@ -3493,8 +3489,8 @@ "train" ], "values": [ - "wednesday", "13:30", + "wednesday", "london kings cross" ], "belief_states": [ @@ -3515,8 +3511,8 @@ "train" ], "values": [ - "wednesday", "13:30", + "wednesday", "london kings cross" ], "belief_states": [ @@ -3539,8 +3535,8 @@ "train" ], "values": [ - "wednesday", "13:30", + "wednesday", "london kings cross" ], "belief_states": [ @@ -3565,8 +3561,8 @@ "train" ], "values": [ - "wednesday", "13:30", + "wednesday", "london kings cross" ], "belief_states": [ @@ -3593,8 +3589,8 @@ "train" ], "values": [ - "wednesday", "13:30", + "wednesday", "london kings cross" ], "belief_states": [ @@ -3953,8 +3949,7 @@ "restaurant" ], "values": [ - "modern", - "european", + "modern european", "centre" ], "belief_states": [ @@ -3972,9 +3967,8 @@ "restaurant" ], "values": [ - "modern", "moderate", - "european", + "modern european", "centre" ], "belief_states": [ @@ -3995,12 +3989,11 @@ "restaurant" ], "values": [ - "modern", "moderate", "1", "17:45", "monday", - "european", + "modern european", "centre" ], "belief_states": [ @@ -4027,14 +4020,13 @@ "restaurant" ], "values": [ - "modern", "moderate", "1", "17:45", - "monday", "16:45", - "european", + "monday", "table for", + "modern european", "centre" ], "belief_states": [ @@ -4063,15 +4055,14 @@ "restaurant" ], "values": [ - "modern", "moderate", "1", "17:45", - "monday", "16:45", "fantastic", - "european", + "monday", "table for", + "modern european", "centre" ], "belief_states": [ @@ -4423,8 +4414,8 @@ ], "values": [ "15:30", - "monday", "8", + "monday", "cambridge", "stevenage" ], @@ -4450,8 +4441,8 @@ ], "values": [ "15:30", - "monday", "8", + "monday", "cambridge", "stevenage" ], @@ -4480,8 +4471,8 @@ ], "values": [ "15:30", - "monday", "8", + "monday", "cambridge", "stevenage" ], @@ -4772,9 +4763,9 @@ "free", "awesome", "1", - "friday", "5", "6", + "friday", "guesthouse" ], "belief_states": [ @@ -4807,10 +4798,10 @@ "free", "awesome", "1", - "friday", "5", "6", "excellent", + "friday", "guesthouse" ], "belief_states": [ @@ -5094,8 +5085,8 @@ "free", "2", "moderate", - "wednesday", "3", + "wednesday", "ashley house", "hotel" ], @@ -5130,8 +5121,8 @@ "free", "2", "moderate", - "wednesday", "3", + "wednesday", "ashley house", "hotel" ], @@ -6003,9 +5994,9 @@ "restaurant" ], "values": [ - "friday", "4", "11:00", + "friday", "panahar" ], "belief_states": [ @@ -6029,10 +6020,10 @@ "restaurant" ], "values": [ - "friday", "4", "11:00", "10:00", + "friday", "panahar" ], "belief_states": [ @@ -6058,10 +6049,10 @@ "restaurant" ], "values": [ - "friday", "4", "11:00", "10:00", + "friday", "panahar" ], "belief_states": [ @@ -6496,8 +6487,8 @@ ], "values": [ "14:00", - "tuesday", "2", + "tuesday", "cambridge", "bishops stortford" ], @@ -6532,8 +6523,8 @@ ], "values": [ "14:00", - "tuesday", "2", + "tuesday", "cambridge", "bishops stortford" ], @@ -6570,8 +6561,8 @@ ], "values": [ "14:00", - "tuesday", "2", + "tuesday", "cambridge", "bishops stortford" ], @@ -6591,8 +6582,8 @@ "train" ], "values": [ - "monday", - "16:45" + "16:45", + "monday" ], "belief_states": [ "leave = 16:45", @@ -6609,8 +6600,8 @@ "train" ], "values": [ - "monday", "16:45", + "monday", "stevenage", "cambridge" ], @@ -6633,9 +6624,9 @@ "train" ], "values": [ - "monday", "16:45", "7", + "monday", "stevenage", "cambridge" ], @@ -6661,9 +6652,9 @@ "train" ], "values": [ - "monday", "16:45", "7", + "monday", "stevenage", "cambridge" ], @@ -6691,9 +6682,9 @@ "train" ], "values": [ - "monday", "16:45", "7", + "monday", "stevenage", "cambridge" ], @@ -6723,9 +6714,9 @@ "train" ], "values": [ - "monday", "16:45", "7", + "monday", "stevenage", "cambridge" ], @@ -6879,8 +6870,8 @@ "cheap", "1", "5", - "thursday", "3", + "thursday", "hotel", "south" ], @@ -6914,8 +6905,8 @@ "cheap", "1", "5", - "thursday", "3", + "thursday", "hotel", "south" ], @@ -7362,8 +7353,8 @@ "awesome", "3", "19:00", - "wednesday", "18:00", + "wednesday", "chinese", "centre" ], @@ -7404,8 +7395,8 @@ "awesome", "3", "19:00", - "wednesday", "18:00", + "wednesday", "chinese", "centre" ], @@ -7444,8 +7435,8 @@ "train" ], "values": [ - "tuesday", "13:30", + "tuesday", "norwich", "cambridge" ], @@ -7468,8 +7459,8 @@ "train" ], "values": [ - "tuesday", "13:30", + "tuesday", "norwich", "cambridge" ], @@ -7494,8 +7485,8 @@ "train" ], "values": [ - "tuesday", "13:30", + "tuesday", "norwich", "cambridge" ], @@ -7522,9 +7513,9 @@ "train" ], "values": [ - "tuesday", "13:30", "total", + "tuesday", "norwich", "cambridge" ], @@ -7553,9 +7544,9 @@ "train" ], "values": [ - "tuesday", "13:30", "total", + "tuesday", "norwich", "cambridge" ], @@ -7794,8 +7785,8 @@ "values": [ "8", "4", - "monday", "1", + "monday", "hotel" ], "belief_states": [ @@ -7822,8 +7813,8 @@ "values": [ "8", "4", - "monday", "1", + "monday", "hotel" ], "belief_states": [ @@ -7842,8 +7833,8 @@ "train" ], "values": [ - "friday", - "21:45" + "21:45", + "friday" ], "belief_states": [ "leave = 21:45", @@ -7860,8 +7851,8 @@ "train" ], "values": [ - "friday", "21:45", + "friday", "cambridge", "stansted airport" ], @@ -7884,8 +7875,8 @@ "train" ], "values": [ - "friday", "21:45", + "friday", "cambridge", "stansted airport" ], @@ -7910,8 +7901,8 @@ "train" ], "values": [ - "friday", "21:45", + "friday", "cambridge", "stansted airport" ], @@ -7938,8 +7929,8 @@ "train" ], "values": [ - "friday", "21:45", + "friday", "cambridge", "stansted airport" ], @@ -7968,8 +7959,8 @@ "train" ], "values": [ - "friday", "21:45", + "friday", "cambridge", "stansted airport" ], @@ -8338,8 +8329,8 @@ "train" ], "values": [ - "sunday", - "20:45" + "20:45", + "sunday" ], "belief_states": [ "day = sunday", @@ -8356,8 +8347,8 @@ "train" ], "values": [ - "sunday", "20:45", + "sunday", "leicester", "cambridge" ], @@ -8380,8 +8371,8 @@ "train" ], "values": [ - "sunday", "20:45", + "sunday", "leicester", "cambridge" ], @@ -8406,8 +8397,8 @@ "train" ], "values": [ - "sunday", "20:45", + "sunday", "leicester", "cambridge" ], @@ -8434,8 +8425,8 @@ "train" ], "values": [ - "sunday", "20:45", + "sunday", "leicester", "cambridge" ], @@ -8649,8 +8640,8 @@ "train" ], "values": [ - "thursday", "08:15", + "thursday", "cambridge", "kings lynn" ], @@ -8675,8 +8666,8 @@ "train" ], "values": [ - "thursday", "08:15", + "thursday", "cambridge", "kings lynn" ], @@ -8703,9 +8694,9 @@ "train" ], "values": [ - "thursday", "08:15", "confusing", + "thursday", "cambridge", "kings lynn" ], @@ -8734,9 +8725,9 @@ "train" ], "values": [ - "thursday", "08:15", "confusing", + "thursday", "cambridge", "kings lynn" ], @@ -9251,8 +9242,8 @@ "train" ], "values": [ - "monday", "11:45", + "monday", "cambridge", "stansted airport" ], @@ -9275,9 +9266,9 @@ "train" ], "values": [ - "monday", "11:45", "5", + "monday", "cambridge", "stansted airport" ], @@ -9303,9 +9294,9 @@ "train" ], "values": [ - "monday", "11:45", "5", + "monday", "cambridge", "stansted airport" ], @@ -9333,9 +9324,9 @@ "train" ], "values": [ - "monday", "11:45", "5", + "monday", "cambridge", "stansted airport" ], @@ -9710,8 +9701,8 @@ "cheap", "1", "15:30", - "monday", "14:30", + "monday", "nandos", "portuguese", "south" @@ -9745,8 +9736,8 @@ "cheap", "1", "15:30", - "monday", "14:30", + "monday", "nandos", "portuguese", "south" @@ -9893,9 +9884,7 @@ "restaurant" ], "values": [ - "modern", - "european", - "northern european" + "modern european" ], "belief_states": [ "food = modern european" @@ -9913,10 +9902,8 @@ "restaurant" ], "values": [ - "modern", "moderate", - "european", - "northern european" + "modern european" ], "belief_states": [ "food = modern european", @@ -9938,10 +9925,8 @@ "restaurant" ], "values": [ - "modern", "moderate", - "european", - "northern european" + "modern european" ], "belief_states": [ "food = modern european", @@ -9965,11 +9950,9 @@ "restaurant" ], "values": [ - "modern", "moderate", - "european", "de luca cucina", - "northern european" + "modern european" ], "belief_states": [ "food = modern european", @@ -9996,11 +9979,9 @@ "restaurant" ], "values": [ - "modern", "moderate", - "european", "de luca cucina", - "northern european" + "modern european" ], "belief_states": [ "food = modern european", @@ -10289,9 +10270,9 @@ "cheap", "4", "free", - "tuesday", "5", "2", + "tuesday", "aloha", "hotel" ], @@ -10324,9 +10305,9 @@ "cheap", "4", "free", - "tuesday", "5", "2", + "tuesday", "aloha", "hotel" ], @@ -10929,9 +10910,9 @@ "4", "moderate", "ahead", - "tuesday", "5", "7", + "tuesday", "guesthouse" ], "belief_states": [ @@ -10963,9 +10944,9 @@ "4", "moderate", "ahead", - "tuesday", "5", "7", + "tuesday", "guesthouse" ], "belief_states": [ @@ -11192,8 +11173,8 @@ "1", "3", "12:15", - "friday", "11:15", + "friday", "portuguese", "south" ], @@ -11229,8 +11210,8 @@ "1", "3", "12:15", - "friday", "11:15", + "friday", "portuguese", "south" ], @@ -11362,8 +11343,8 @@ "moderate", "8", "14:15", - "friday", "13:15", + "friday", "british", "centre" ], @@ -11398,8 +11379,8 @@ "moderate", "8", "14:15", - "friday", "13:15", + "friday", "british", "centre" ], @@ -11421,7 +11402,7 @@ "hotel" ], "values": [ - "city centre north" + "centre north" ], "belief_states": [ "name = city centre north b and b" @@ -11437,7 +11418,7 @@ "hotel" ], "values": [ - "city centre north" + "centre north" ], "belief_states": [ "name = city centre north b and b" @@ -11457,7 +11438,7 @@ "values": [ "2", "monday", - "city centre north" + "centre north" ], "belief_states": [ "name = city centre north b and b" @@ -11478,9 +11459,9 @@ ], "values": [ "2", - "monday", "4", - "city centre north" + "monday", + "centre north" ], "belief_states": [ "name = city centre north b and b", @@ -11506,9 +11487,9 @@ ], "values": [ "2", - "monday", "4", - "city centre north" + "monday", + "centre north" ], "belief_states": [ "name = city centre north b and b", @@ -11620,8 +11601,8 @@ "overdue", "free", "4", - "monday", "2", + "monday", "guesthouse", "east" ], @@ -11657,8 +11638,8 @@ "overdue", "free", "4", - "monday", "2", + "monday", "guesthouse", "east" ], @@ -11697,8 +11678,8 @@ "overdue", "free", "4", - "monday", "2", + "monday", "guesthouse", "east" ], @@ -12078,9 +12059,9 @@ ], "values": [ "14:30", - "thursday", "10", "7", + "thursday", "london kings cross", "cambridge" ], @@ -12110,9 +12091,9 @@ ], "values": [ "14:30", - "thursday", "10", "7", + "thursday", "london kings cross", "cambridge" ], @@ -12150,8 +12131,8 @@ "train" ], "values": [ - "sunday", "16:30", + "sunday", "cambridge" ], "belief_states": [ @@ -12173,9 +12154,9 @@ "train" ], "values": [ - "sunday", "16:30", "4", + "sunday", "cambridge" ], "belief_states": [ @@ -12200,9 +12181,9 @@ "train" ], "values": [ - "sunday", "16:30", "4", + "sunday", "cambridge" ], "belief_states": [ @@ -12793,8 +12774,8 @@ "values": [ "7", "3", - "thursday", "11:00", + "thursday", "a and b guest house" ], "belief_states": [ @@ -12821,8 +12802,8 @@ "values": [ "7", "3", - "thursday", "11:00", + "thursday", "a and b guest house" ], "belief_states": [ @@ -12979,9 +12960,9 @@ "values": [ "expensive", "1", - "saturday", "19:45", "3", + "saturday", "centre", "indian" ], @@ -13013,9 +12994,9 @@ "values": [ "expensive", "1", - "saturday", "19:45", "3", + "saturday", "centre", "indian" ], @@ -13049,9 +13030,9 @@ "values": [ "expensive", "1", - "saturday", "19:45", "3", + "saturday", "centre", "indian" ], @@ -13358,8 +13339,8 @@ "train" ], "values": [ - "sunday", - "13:30" + "13:30", + "sunday" ], "belief_states": [ "leave = 13:30", @@ -13376,8 +13357,8 @@ "train" ], "values": [ - "sunday", "13:30", + "sunday", "cambridge", "stevenage" ], @@ -13400,10 +13381,10 @@ "train" ], "values": [ - "sunday", "13:30", "shouldnt", "6", + "sunday", "cambridge", "stevenage" ], @@ -13429,10 +13410,10 @@ "train" ], "values": [ - "sunday", "13:30", "shouldnt", "6", + "sunday", "cambridge", "stevenage" ], @@ -13512,8 +13493,8 @@ "train" ], "values": [ - "thursday", "17:45", + "thursday", "leicester", "cambridge" ], @@ -13540,9 +13521,9 @@ "train" ], "values": [ - "thursday", "17:45", "total", + "thursday", "leicester", "cambridge" ], @@ -13571,9 +13552,9 @@ "train" ], "values": [ - "thursday", "17:45", "total", + "thursday", "leicester", "cambridge" ], @@ -13604,9 +13585,9 @@ "train" ], "values": [ - "thursday", "17:45", "total", + "thursday", "leicester", "cambridge" ], @@ -13668,8 +13649,8 @@ "cheap", "free", "6", - "thursday", "4", + "thursday", "east", "hotel" ], @@ -13699,8 +13680,8 @@ "cheap", "free", "6", - "thursday", "4", + "thursday", "east", "hotel" ], @@ -13738,8 +13719,8 @@ ], "values": [ "1", - "sunday", "3", + "sunday", "hotel" ], "belief_states": [ @@ -13762,9 +13743,9 @@ ], "values": [ "1", - "sunday", "3", "2", + "sunday", "hotel" ], "belief_states": [ @@ -13789,9 +13770,9 @@ ], "values": [ "1", - "sunday", "3", "2", + "sunday", "hotel" ], "belief_states": [ @@ -13849,8 +13830,8 @@ "train" ], "values": [ - "sunday", "10:15", + "sunday", "cambridge", "leicester" ], @@ -13875,11 +13856,11 @@ "train" ], "values": [ - "sunday", "10:15", "interested", "1", "ready", + "sunday", "cambridge", "leicester" ], @@ -14029,8 +14010,8 @@ "2", "4", "8", - "saturday", "5", + "saturday", "guesthouse" ], "belief_states": [ @@ -14067,10 +14048,10 @@ "2", "4", "8", - "saturday", "5", "happy", "relieved", + "saturday", "guesthouse" ], "belief_states": [ @@ -14164,8 +14145,8 @@ "expensive", "7", "11:30", - "thursday", "10:30", + "thursday", "italian", "centre" ], @@ -14198,8 +14179,8 @@ "expensive", "7", "11:30", - "thursday", "10:30", + "thursday", "italian", "centre" ], @@ -14468,8 +14449,8 @@ ], "values": [ "4", - "tuesday", "6", + "tuesday", "hotel" ], "belief_states": [ @@ -14492,8 +14473,8 @@ ], "values": [ "4", - "tuesday", "6", + "tuesday", "hotel" ], "belief_states": [ @@ -14551,8 +14532,8 @@ "train" ], "values": [ - "saturday", "08:15", + "saturday", "bishops stortford", "cambridge" ], @@ -14577,8 +14558,8 @@ "train" ], "values": [ - "saturday", "08:15", + "saturday", "bishops stortford", "cambridge" ], @@ -14605,8 +14586,8 @@ "train" ], "values": [ - "saturday", "08:15", + "saturday", "bishops stortford", "cambridge" ], @@ -15006,8 +14987,8 @@ ], "values": [ "09:30", - "monday", "terrific", + "monday", "leicester", "cambridge" ], @@ -15036,8 +15017,8 @@ ], "values": [ "09:30", - "monday", "terrific", + "monday", "leicester", "cambridge" ], @@ -15540,9 +15521,9 @@ "restaurant" ], "values": [ - "thursday", "12:45", "7", + "thursday", "thai", "west" ], @@ -15570,9 +15551,9 @@ "restaurant" ], "values": [ - "thursday", "12:45", "7", + "thursday", "thai", "west" ], @@ -15602,9 +15583,9 @@ "restaurant" ], "values": [ - "thursday", "12:45", "7", + "thursday", "thai", "west" ], @@ -15856,8 +15837,8 @@ "2", "free", "3", - "wednesday", "1", + "wednesday", "free parking & wifi", "hotel" ], @@ -15896,8 +15877,8 @@ "2", "free", "3", - "wednesday", "1", + "wednesday", "free parking & wifi", "hotel" ], @@ -16075,8 +16056,8 @@ "train" ], "values": [ - "sunday", "0930", + "sunday", "london kings cross", "cambridge" ], @@ -16099,9 +16080,9 @@ "train" ], "values": [ - "sunday", "0930", "3", + "sunday", "london kings cross", "cambridge" ], @@ -16127,9 +16108,9 @@ "train" ], "values": [ - "sunday", "0930", "3", + "sunday", "london kings cross", "cambridge" ], @@ -16212,8 +16193,8 @@ "values": [ "1", "5", - "monday", "15:15", + "monday", "chinese", "centre" ], @@ -16243,8 +16224,8 @@ "values": [ "1", "5", - "monday", "15:15", + "monday", "chinese", "centre" ], @@ -16299,8 +16280,8 @@ ], "values": [ "1", - "sunday", "19:45", + "sunday", "pizza hut cherry hinton" ], "belief_states": [ @@ -16325,8 +16306,8 @@ ], "values": [ "1", - "sunday", "19:45", + "sunday", "pizza hut cherry hinton" ], "belief_states": [ @@ -17100,8 +17081,8 @@ "expensive", "1", "17:30", - "sunday", "16:30", + "sunday", "cote", "french" ], @@ -17133,9 +17114,9 @@ "expensive", "1", "17:30", - "sunday", "16:30", "excellent", + "sunday", "cote", "french" ], @@ -17169,9 +17150,9 @@ "expensive", "1", "17:30", - "sunday", "16:30", "excellent", + "sunday", "cote", "french" ], @@ -17424,9 +17405,9 @@ ], "values": [ "12:15", - "sunday", "5", "13:09", + "sunday", "leicester", "cambridge" ], @@ -17455,10 +17436,10 @@ ], "values": [ "12:15", - "sunday", "5", "13:09", "awesome", + "sunday", "leicester", "cambridge" ], @@ -17571,8 +17552,8 @@ "values": [ "4", "16:15", - "wednesday", "15:15", + "wednesday", "south", "indian" ], @@ -17605,8 +17586,8 @@ "values": [ "4", "16:15", - "wednesday", "15:15", + "wednesday", "south", "indian" ], @@ -17831,8 +17812,8 @@ "moderate", "8", "16:30", - "wednesday", "15:30", + "wednesday", "centre", "chinese" ], @@ -17867,8 +17848,8 @@ "moderate", "8", "16:30", - "wednesday", "15:30", + "wednesday", "centre", "chinese" ], @@ -18269,8 +18250,8 @@ "train" ], "values": [ - "sunday", "14:00", + "sunday", "london kings cross" ], "belief_states": [ @@ -18295,9 +18276,9 @@ "train" ], "values": [ - "sunday", "14:00", "pretty", + "sunday", "london kings cross" ], "belief_states": [ @@ -18325,9 +18306,9 @@ "train" ], "values": [ - "sunday", "14:00", "pretty", + "sunday", "london kings cross" ], "belief_states": [ @@ -18357,9 +18338,9 @@ "train" ], "values": [ - "sunday", "14:00", "pretty", + "sunday", "london kings cross" ], "belief_states": [ @@ -18417,8 +18398,8 @@ "train" ], "values": [ - "friday", "13:30", + "friday", "stevenage", "cambridge" ], @@ -18443,9 +18424,9 @@ "train" ], "values": [ - "friday", "13:30", "3", + "friday", "stevenage", "cambridge" ], @@ -18473,9 +18454,9 @@ "train" ], "values": [ - "friday", "13:30", "3", + "friday", "stevenage", "cambridge" ], @@ -18515,8 +18496,8 @@ "values": [ "expensive", "4", - "monday", "1300", + "monday", "chinese" ], "belief_states": [ @@ -18541,9 +18522,9 @@ "values": [ "expensive", "4", - "monday", "1300", "13:00", + "monday", "chinese" ], "belief_states": [ @@ -18571,7 +18552,6 @@ "values": [ "expensive", "4", - "monday", "1300", "13:00", "tuesday", @@ -18604,7 +18584,6 @@ "values": [ "expensive", "4", - "monday", "1300", "13:00", "tuesday", @@ -18747,9 +18726,8 @@ "values": [ "free", "2", - "saturday", - "wed", "4", + "wed", "guesthouse", "west" ], @@ -18779,9 +18757,8 @@ "values": [ "free", "2", - "saturday", - "wed", "4", + "wed", "guesthouse", "west" ], @@ -18813,9 +18790,8 @@ "values": [ "free", "2", - "saturday", - "wed", "4", + "wed", "guesthouse", "west" ], @@ -19607,7 +19583,6 @@ "4", "moderate", "3", - "sunday", "monday", "guesthouse" ], @@ -19645,7 +19620,6 @@ "4", "moderate", "3", - "sunday", "monday", "guesthouse" ], @@ -19765,8 +19739,8 @@ "expensive", "5", "11:15", - "wednesday", "10:15", + "wednesday", "centre", "chinese" ], @@ -19800,8 +19774,8 @@ "expensive", "5", "11:15", - "wednesday", "10:15", + "wednesday", "centre", "chinese" ], @@ -19837,8 +19811,8 @@ "expensive", "5", "11:15", - "wednesday", "10:15", + "wednesday", "centre", "chinese" ], @@ -19951,9 +19925,9 @@ "train" ], "values": [ - "thursday", "13:00", "5", + "thursday", "london kings cross", "cambridge" ], @@ -19979,9 +19953,9 @@ "train" ], "values": [ - "thursday", "13:00", "5", + "thursday", "london kings cross", "cambridge" ], @@ -20009,9 +19983,9 @@ "train" ], "values": [ - "thursday", "13:00", "5", + "thursday", "london kings cross", "cambridge" ], @@ -20155,8 +20129,8 @@ "values": [ "7", "1215", - "thursday", "11:15", + "thursday", "west", "thai" ], @@ -20191,8 +20165,8 @@ "values": [ "7", "1215", - "thursday", "11:15", + "thursday", "west", "thai" ], @@ -20229,8 +20203,8 @@ "values": [ "7", "1215", - "thursday", "11:15", + "thursday", "west", "thai" ], @@ -20398,8 +20372,8 @@ "expensive", "free", "moderate", - "tuesday", "5", + "tuesday", "guesthouse", "east" ], @@ -20432,8 +20406,8 @@ "expensive", "free", "moderate", - "tuesday", "5", + "tuesday", "guesthouse", "east" ], @@ -20636,8 +20610,8 @@ ], "values": [ "3", - "wednesday", "18:15", + "wednesday", "efes restaurant" ], "belief_states": [ @@ -20663,9 +20637,9 @@ ], "values": [ "3", - "wednesday", "18:15", "17:15", + "wednesday", "efes restaurant" ], "belief_states": [ @@ -20693,9 +20667,9 @@ ], "values": [ "3", - "wednesday", "18:15", "17:15", + "wednesday", "efes restaurant" ], "belief_states": [ diff --git a/prompt-learning/prompt_utils.py b/prompt-learning/prompt_utils.py index 87e589f..acefd9d 100644 --- a/prompt-learning/prompt_utils.py +++ b/prompt-learning/prompt_utils.py @@ -12,7 +12,7 @@ PROMPT_TEMPLATES = { "generate": "belief states: value = $value, slot =" }, "inverse-prompt": { - "training": "belief states: $slot = $value", + "training": "belief states: slot = $slot, value = $value", }, "prompt-ensemble": { "training": { diff --git a/prompt-learning/train_prompting.sh b/prompt-learning/train_prompting.sh index ab9c689..44d8bfe 100644 --- a/prompt-learning/train_prompting.sh +++ b/prompt-learning/train_prompting.sh @@ -46,15 +46,22 @@ datetime_now=$(date +"%Y%m%dT%H%M%S") experiment_folder="${data_split}"/experiment-${datetime_now} SAVE_DIR="${SAVED_MODELS_PROMPT}"/"${experiment_folder}" -echo "Trained Models (checkpoints/epochs) are saved in ${SAVE_DIR}" +echo "Trained Models (epochs) will be saved in ${SAVE_DIR}" + +# different number of epoch for different training sets +if [ "$data_split" = "5-dpd" ] || [ "$data_split" = "10-dpd" ]; then + epochs=5 +else + epochs=8 +fi python prompt_train.py \ --save_model_dir="${SAVE_DIR}" \ --pretrained_model_path="${PRE_TRAINED_SOLOIST}" \ --train_data_file="${TRAIN_DATA_FILE}" \ --validation_file=../data/prompt-learning/valid/valid.soloist.json \ ---num_epochs 10 \ +--num_epochs $epochs \ --learning_rate 5e-5 \ +--with_prompt_ensemble \ --with_inverse_prompt \ ---inverse_prompt_weight 0.1 \ ---with_prompt_ensemble \ No newline at end of file +--inverse_prompt_weight 0.1 \ No newline at end of file diff --git a/utils/corenlp.py b/utils/corenlp.py index 7ff640f..8d2f92e 100644 --- a/utils/corenlp.py +++ b/utils/corenlp.py @@ -1 +1 @@ -import stanza import os from pathlib import Path from stanza.server import CoreNLPClient STOPWORDS_FILE = "../data/resource/stopwords.txt" CORENLP_DIR = str(Path().absolute()) + '/corenlp-dir' # properties for the CoreNLP Server ANNOTATORS = ['tokenize', 'ssplit', 'pos', 'lemma', 'ner', 'regexner'] # install/download the latest version of CoreNLP stanza.install_corenlp(dir=CORENLP_DIR) extra_model_jar = os.path.join(CORENLP_DIR, 'stanford-corenlp-4.5.1-models-english-extra.jar') if not os.path.isfile(extra_model_jar): # download corenlp english models stanza.download_corenlp_models(model='english-extra', version='4.5.1', dir=CORENLP_DIR) else: print('English Extra CoreNLP models available!') # set environment var of installation location os.environ["CORENLP_HOME"] = CORENLP_DIR VALUES_CONVERT = { 'zero': '0', 'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5', 'six': '6', 'seven': '7', 'eight': '8', 'nine': '9', 'ten': '10', 'not important': 'dont care', 'not expensive': 'cheap' } # corenlp properties properties = { 'ner.applyFineGrained': False, 'pos.model': 'edu/stanford/nlp/models/pos-tagger/english-caseless-left3words-distsim.tagger', 'ner.model': 'edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.crf.ser.gz,' 'edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.crf.ser.gz,' 'edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.crf.ser.gz', 'ner.additional.regexner.mapping': str(Path().absolute()) + '/regexner.rules' } DATE_TIME_NUM = {'TIME', 'DATE', 'NUMBER'} class ValueExtractor: def __init__(self): # create the CoreNLP client self.client = CoreNLPClient( annotators=ANNOTATORS, properties=properties, timeout=300000, memory='8G', threads=8, be_quiet=True ) # load the stopwords.txt file self.stopwords = set() with open(STOPWORDS_FILE, 'r') as file: for line in file: self.stopwords.add(line.strip()) def start(self): self.client.start() def stop(self): self.client.stop() def extract_value_candidates(self, history): # format history and prepare text for annotation user_utterance = '' for utterance in history: # consider only user utterances if utterance.startswith('user :'): utterance = utterance[len('user :'):] # if sentence doesn't end with punctuation, add . if not utterance.endswith(('.', '?', '!')): utterance = utterance + '.' # append all utterances user_utterance = user_utterance + utterance + ' ' if not user_utterance: return [] value_candidates = [] user_utterance = user_utterance.replace("'", "") # use corenlp client and annotate text annotation = self.client.annotate(user_utterance) for sent in annotation.sentence: prev_word = '' for token in sent.token: # TODO :: remove # print("{:12s}\t{:12s}\t{:6s}\t{}".format(token.word, token.lemma, token.pos, token.ner)) word = token.word.strip() # extract Adjectives & Adverbs using POS tags # exclude the custom entity types if token.pos in ['JJ', 'RB'] \ and token.ner not in ['AREA', 'PLACE', 'PLACE_TYPE', 'FOOD_TYPE']: # check if the word ends with 'ly' word = word.removesuffix('ly') if word not in self.stopwords and word not in value_candidates: if prev_word == 'not' and prev_word + ' ' + word in VALUES_CONVERT: word = VALUES_CONVERT[prev_word + ' ' + word] value_candidates.append(word) prev_word = word # extract day, time & numbers if token.ner in DATE_TIME_NUM: if word in VALUES_CONVERT: word = VALUES_CONVERT[word] if token.ner == "DATE" and token.pos not in ["NNP", "CD"]: continue if word not in self.stopwords: if token.ner == "TIME": if not word[0].isdigit(): continue if len(word) == 4: word = word.zfill(5) if word in value_candidates: continue value_candidates.append(word) entity_map = {} # extract named entities (place, area,...) for sent in annotation.sentence: for mention in sent.mentions: # TODO :: remove # print("{:30s}\t{}".format(mention.entityMentionText, mention.entityType)) entity_text = mention.entityMentionText.strip() if mention.entityType not in {'TIME', 'DATE', 'NUMBER', 'DURATION'} \ and entity_text not in value_candidates: if mention.entityType in ['AREA', 'FOOD_TYPE', 'PLACE_TYPE']: entity_map[mention.entityType] = entity_text else: value_candidates.append(entity_text) more_values = list(entity_map.values()) return value_candidates + more_values # sample test TODO:: remove # dialog_history = [ # "user : i need a taxi to go to mahal of cambridge", # "system : i can help with that . did you have a specific time in mind ?", # "user : yes, i'd like to leave after 2:30 please.", # "system : where would you like to depart from ?", # "user : i am departing from jesus green outdoor pool.", # "system : your taxi is booked . it is a [taxi_type] .", # "user : what is the contact number for the taxi?", # "system : the contact number is [taxi_phone] . can i help you with anything else today ?", # "user : thanks that's all for today ." # ] # extractor = ValueExtractor() # extractor.start() # values = extractor.extract_value_candidates(dialog_history) # extractor.stop() # print('Extracted Values: ', values) \ No newline at end of file +import stanza import os from pathlib import Path from stanza.server import CoreNLPClient STOPWORDS_FILE = "../data/resource/stopwords.txt" CORENLP_DIR = str(Path().absolute()) + '/corenlp-dir' # properties for the CoreNLP Server ANNOTATORS = ['tokenize', 'ssplit', 'pos', 'lemma', 'ner', 'regexner'] # install/download the latest version of CoreNLP stanza.install_corenlp(dir=CORENLP_DIR) extra_model_jar = os.path.join(CORENLP_DIR, 'stanford-corenlp-4.5.1-models-english-extra.jar') if not os.path.isfile(extra_model_jar): # download corenlp english models stanza.download_corenlp_models(model='english-extra', version='4.5.1', dir=CORENLP_DIR) else: print('English Extra CoreNLP models available!') # set environment var of installation location os.environ["CORENLP_HOME"] = CORENLP_DIR VALUES_CONVERT = { 'zero': '0', 'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5', 'six': '6', 'seven': '7', 'eight': '8', 'nine': '9', 'ten': '10', 'not important': 'dont care', 'not expensive': 'cheap' } # corenlp properties properties = { 'ner.applyFineGrained': False, 'pos.model': 'edu/stanford/nlp/models/pos-tagger/english-caseless-left3words-distsim.tagger', 'ner.model': 'edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.crf.ser.gz,' 'edu/stanford/nlp/models/ner/english.muc.7class.caseless.distsim.crf.ser.gz,' 'edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.crf.ser.gz', 'ner.additional.regexner.mapping': str(Path().absolute()) + '/regexner.rules' } DATE_TIME_NUM = {'TIME', 'DATE', 'NUMBER'} class ValueExtractor: def __init__(self): # create the CoreNLP client self.client = CoreNLPClient( annotators=ANNOTATORS, properties=properties, timeout=300000, memory='8G', threads=8, be_quiet=True ) # load the stopwords.txt file self.stopwords = set() with open(STOPWORDS_FILE, 'r') as file: for line in file: self.stopwords.add(line.strip()) def start(self): self.client.start() def stop(self): self.client.stop() def extract_value_candidates(self, history): # format history and prepare text for annotation user_utterance = '' for utterance in history: # consider only user utterances if utterance.startswith('user :'): utterance = utterance[len('user :'):] # if sentence doesn't end with punctuation, add . if not utterance.endswith(('.', '?', '!')): utterance = utterance + '.' # append all utterances user_utterance = user_utterance + utterance + ' ' if not user_utterance: return [] value_candidates = [] user_utterance = user_utterance.replace("'", "") # use corenlp client and annotate text annotation = self.client.annotate(user_utterance) date_day = '' for sent in annotation.sentence: prev_word = '' for token in sent.token: # TODO :: remove # print("{:12s}\t{:12s}\t{:6s}\t{}".format(token.word, token.lemma, token.pos, token.ner)) word = token.word.strip() # extract Adjectives & Adverbs using POS tags # exclude the custom entity types if token.pos in ['JJ', 'RB'] \ and token.ner not in ['AREA', 'PLACE', 'PLACE_TYPE', 'FOOD_TYPE']: # check if the word ends with 'ly' word = word.removesuffix('ly') if word not in self.stopwords and word not in value_candidates: if prev_word == 'not' and prev_word + ' ' + word in VALUES_CONVERT: word = VALUES_CONVERT[prev_word + ' ' + word] value_candidates.append(word) prev_word = word # extract day, time & numbers if token.ner in DATE_TIME_NUM: if word in VALUES_CONVERT: word = VALUES_CONVERT[word] if token.ner == "DATE" and token.pos not in ["NNP", "CD"]: continue if token.ner == "DATE" and token.pos == "NNP": date_day = word continue if word not in self.stopwords: if token.ner == "TIME": if not word[0].isdigit(): continue if len(word) == 4: word = word.zfill(5) if word in value_candidates: continue value_candidates.append(word) # add day/date to value candidates if date_day: value_candidates.append(date_day) entity_map = {} # extract named entities (place, area,...) for sent in annotation.sentence: for mention in sent.mentions: # TODO :: remove # print("{:30s}\t{}".format(mention.entityMentionText, mention.entityType)) entity_text = mention.entityMentionText.strip() if mention.entityType not in {'TIME', 'DATE', 'NUMBER', 'DURATION'} \ and entity_text not in value_candidates: if mention.entityType in ['AREA', 'FOOD_TYPE', 'PLACE_TYPE']: entity_map[mention.entityType] = entity_text else: value_candidates.append(entity_text) more_values = list(entity_map.values()) return value_candidates + more_values # sample test TODO:: remove # dialog_history = [ # "user : i need a taxi to go to mahal of cambridge", # "system : i can help with that . did you have a specific time in mind ?", # "user : yes, i'd like to leave after 2:30 please.", # "system : where would you like to depart from ?", # "user : i am departing from jesus green outdoor pool.", # "system : your taxi is booked . it is a [taxi_type] .", # "user : what is the contact number for the taxi?", # "system : the contact number is [taxi_phone] . can i help you with anything else today ?", # "user : thanks that's all for today ." # ] # extractor = ValueExtractor() # extractor.start() # values = extractor.extract_value_candidates(dialog_history) # extractor.stop() # print('Extracted Values: ', values) \ No newline at end of file diff --git a/utils/create_dataset.py b/utils/create_dataset.py index 27bf8f1..699503f 100644 --- a/utils/create_dataset.py +++ b/utils/create_dataset.py @@ -39,7 +39,9 @@ CORRECTIONS = { "christs college": "christ college", "museums": "museum", "alexander": "alexander bed and breakfast", - "ian hong house": "lan hong house" + "ian hong house": "lan hong house", + "saint catharines college": "saint catherines college", + "gandhi": "the gandhi" } diff --git a/utils/regexner.rules b/utils/regexner.rules index 3676055..19521c1 100644 --- a/utils/regexner.rules +++ b/utils/regexner.rules @@ -43,7 +43,7 @@ stevenage train station PLACE parkside police station PLACE birmingham new street PLACE jesus green outdoor pool PLACE -sheep's green and lammas land park fen causeway PLACE +sheeps green and lammas land park fen causeway PLACE broxbourne PLACE broxbourne train station PLACE duxford PLACE @@ -57,7 +57,6 @@ huntingdon marriott hotel PLACE glastonbury PLACE city hall PLACE hughes hall PLACE -city centre north PLACE city centre north b and b PLACE cafe jello gallery PLACE the junction PLACE @@ -68,11 +67,11 @@ hobsons house PLACE funky fun house PLACE avalon PLACE regency gallery PLACE +ruskin gallery PLACE churchills college PLACE christs college PLACE holy trinity church PLACE cineworld cinema PLACE -hotel PLACE_TYPE guesthouse PLACE_TYPE architecture PLACE_TYPE boat PLACE_TYPE @@ -118,10 +117,13 @@ frankie and bennys PLACE kymmoy PLACE dojo noodle bar PLACE the bedouin PLACE +mumford theatre PLACE restaurant alimentum PLACE gourmet burger kitchen PLACE la margherita PLACE golden house PLACE +kirkwood house PLACE +warkworth house PLACE chiquito PLACE darrys cookhouse and wine shop PLACE scudamores punt PLACE @@ -149,6 +151,7 @@ rajmahal PLACE ali baba PLACE limehouse PLACE the grafton hotel PLACE +lensfield hotel PLACE barbakan PLACE sesame restaurant and bar PLACE golden wok PLACE @@ -179,7 +182,6 @@ european PLACE saffron brasserie PLACE gardenia PLACE de luca cucina and bar PLACE -two two PLACE ashley hotel PLACE the hotpot PLACE michaelhouse cafe PLACE @@ -224,6 +226,7 @@ sitar PLACE alex PLACE cambridge chop house PLACE cambridge arts theatre PLACE +cambridge and county folk museum PLACE the missing sock PLACE primavera PLACE the meze bar PLACE @@ -340,6 +343,7 @@ mexican FOOD_TYPE middle eastern FOOD_TYPE modern american FOOD_TYPE modern english FOOD_TYPE +modern european FOOD_TYPE moroccan FOOD_TYPE north african FOOD_TYPE north american FOOD_TYPE