Use stanza corenlp library for better value extraction

main
Pavan Mandava 3 years ago
parent 47c44b99f8
commit 0856378436

6
.gitignore vendored

@ -182,5 +182,7 @@ ipython_config.py
# runs folder # runs folder
baseline/runs/ baseline/runs/
.local .local
utils/stanford-corenlp utils/corenlp-dir
prompt-learning/corenlp-dir
corenlp-dir
*.props

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -4,6 +4,7 @@ yeas
perfect perfect
sounds sounds
long long
dont
dont' dont'
nope nope
then then
@ -19,6 +20,7 @@ help
pre pre
fine fine
ly ly
-
! !
, ,
. .
@ -53,9 +55,12 @@ a's
able able
about about
above above
absolute
absolutely
according according
accordingly accordingly
across across
actual
actually actually
after after
afterwards afterwards
@ -103,6 +108,7 @@ available
away away
awfully awfully
b b
bad
be be
became became
because because
@ -174,6 +180,8 @@ downwards
during during
e e
each each
ear
early
edu edu
eg eg
eight eight
@ -193,6 +201,7 @@ everyone
everything everything
everywhere everywhere
ex ex
exact
exactly exactly
example example
except except
@ -206,11 +215,14 @@ followed
following following
follows follows
for for
fore
former former
formerly formerly
forth forth
four four
from from
fun
funky
further further
furthermore furthermore
g g
@ -230,6 +242,7 @@ h
had had
hadn't hadn't
happens happens
hard
hardly hardly
has has
hasn't hasn't
@ -255,6 +268,8 @@ him
himself himself
his his
hither hither
ho
hopeful
hopefully hopefully
how how
howbeit howbeit
@ -299,6 +314,7 @@ known
knows knows
l l
last last
late
lately lately
later later
latter latter
@ -317,6 +333,7 @@ looking
looks looks
ltd ltd
m m
main
mainly mainly
many many
may may
@ -324,6 +341,7 @@ maybe
me me
mean mean
meanwhile meanwhile
mere
merely merely
might might
more more
@ -335,6 +353,7 @@ must
my my
myself myself
n n
n't
name name
namely namely
nd nd
@ -351,12 +370,14 @@ next
night night
nights nights
nine nine
nt
no no
nobody nobody
non non
none none
noone noone
nor nor
normal
normally normally
not not
nothing nothing
@ -365,6 +386,7 @@ now
nowhere nowhere
number number
o o
obvious
obviously obviously
of of
off off
@ -402,9 +424,16 @@ placed
please please
plus plus
possible possible
postcode
preferab
preferably
presumab
presumably presumably
price price
probab
probably probably
proper
properly
provides provides
q q
que que
@ -417,12 +446,16 @@ ratings
range range
rd rd
re re
real
really really
reasonab
reasonably reasonably
regarding regarding
regardless regardless
regards regards
relative
relatively relatively
respective
respectively respectively
right right
s s
@ -456,6 +489,7 @@ shouldn't
since since
six six
so so
social
some some
somebody somebody
somehow somehow
@ -467,6 +501,7 @@ somewhat
somewhere somewhere
soon soon
sorry sorry
specific
specified specified
specify specify
specifying specifying
@ -532,6 +567,7 @@ toward
towards towards
tried tried
tries tries
tru
truly truly
try try
trying trying
@ -540,8 +576,10 @@ two
u u
un un
under under
unfortunate
unfortunately unfortunately
unless unless
unlike
unlikely unlikely
until until
unto unto
@ -553,6 +591,7 @@ used
useful useful
uses uses
using using
usual
usually usually
uucp uucp
v v
@ -611,6 +650,7 @@ within
without without
won't won't
wonder wonder
wonderful
would would
wouldn't wouldn't
x x
@ -634,4 +674,3 @@ restaurant
attraction attraction
taxi taxi
book book

@ -1,6 +1,7 @@
import json import json
import os import os
import numpy as np import numpy as np
import collections
class PromptDstDataset: class PromptDstDataset:
@ -75,3 +76,24 @@ class PromptDstDataset:
def total_slot_value_pairs(self): def total_slot_value_pairs(self):
return self.total_num_slot_value_pairs return self.total_num_slot_value_pairs
def compute_value_extraction_accuracy(self):
# iterate through data items list and extract values
correct_values, correct_turns = 0, 0
for item in self.dataset_items:
extracted_values = collections.Counter(item['values'])
true_values = collections.Counter([value for _, value in item['belief_states']])
if extracted_values == true_values:
correct_turns += 1
else:
print('Extracted: ', extracted_values.keys())
print('True Values: ', true_values.keys())
print("")
for key in true_values:
if key in extracted_values \
and true_values[key] == extracted_values[key]:
correct_values += true_values[key]
print('Accuracy :: ', ((correct_turns/self.len())*100))
print('Slot-Value Accuracy :: ', ((correct_values/self.total_slot_value_pairs())*100))

@ -2,4 +2,4 @@ torch==1.13.0
numpy==1.23.4 numpy==1.23.4
transformers==4.23.1 transformers==4.23.1
tqdm==4.64.1 tqdm==4.64.1
stanfordcorenlp==3.9.1.1 stanza==1.4.2

File diff suppressed because one or more lines are too long

@ -1,13 +1,11 @@
import collections
import json import json
import os import os
from pathlib import Path from pathlib import Path
from stanfordcorenlp import StanfordCoreNLP from tqdm.auto import tqdm
from corenlp import get_value_candidates_from_history from corenlp import ValueExtractor
from collections import Counter
BELIEF_PREFIX = 'belief :' BELIEF_PREFIX = 'belief :'
CORENLP_PATH = "../utils/stanford-corenlp"
MODIFIED_SLOTS = { MODIFIED_SLOTS = {
'area': 'area', 'area': 'area',
@ -28,7 +26,20 @@ MODIFIED_SLOTS = {
'type': 'type' 'type': 'type'
} }
max_len = 0 CORRECTIONS = {
"pizza hut fenditton": "pizza hut fen ditton",
"saint johns chop house": "saint johns chop shop house",
"1515": "15:15",
"center": "centre",
"pizza express fen ditton": "pizza hut fen ditton",
"apha-milton": "alpha-milton guest house",
"concerthall": "concert hall",
"oak bistro": "the oak bistro",
"nightclub": "night club",
"christs college": "christ college",
"museums": "museum",
"alexander": "alexander bed and breakfast"
}
def convert_slot_for_prompting(slot_value_item): def convert_slot_for_prompting(slot_value_item):
@ -48,6 +59,10 @@ def convert_slot_for_prompting(slot_value_item):
# modify the slot for prompting # modify the slot for prompting
modified_slot = MODIFIED_SLOTS[slot] modified_slot = MODIFIED_SLOTS[slot]
# correct the value (if required)
if value in CORRECTIONS:
value = CORRECTIONS[value]
# compose the 'slot = value' string from modified slot and return # compose the 'slot = value' string from modified slot and return
return modified_slot + ' = ' + value return modified_slot + ' = ' + value
@ -64,11 +79,14 @@ def create_belief_states_data_for_prompt_learning(data_tuple):
if len(data) <= 0: if len(data) <= 0:
return return
nlp = None progress = tqdm(total=len(data), desc="Creating Dataset ("+data_tuple[2]+")", leave=False)
extractor = None
# start the CoreNLP server for Value Extraction # start the CoreNLP server for Value Extraction
# Only required for test/valid dataset # Only required for test/valid dataset
if data_tuple[1] in ['test', 'valid']: if data_tuple[1] in ['test', 'valid']:
nlp = StanfordCoreNLP(CORENLP_PATH, memory='8g') extractor = ValueExtractor()
extractor.start()
# data to be saved for prompt learning # data to be saved for prompt learning
belief_states_dataset = [] belief_states_dataset = []
@ -76,13 +94,16 @@ def create_belief_states_data_for_prompt_learning(data_tuple):
for item in data: for item in data:
# map to be added to the list for saving # map to be added to the list for saving
belief_states_data_item = {} belief_states_data_item = {}
# add the history & domains of dialog to the data item # add the history & domains of dialog to the data item
belief_states_data_item['history'] = item['history'] belief_states_data_item['history'] = item['history']
belief_states_data_item['domains'] = item['domains'] belief_states_data_item['domains'] = item['domains']
# extract value candidates using stanford CoreNLP & add to test/valid dataset # extract value candidates using stanford CoreNLP & add to test/valid dataset
if data_tuple[1] in ['test', 'valid']: if data_tuple[1] in ['test', 'valid']:
belief_states_data_item['values'] = get_value_candidates_from_history(nlp, item['history']) values = extractor.extract_value_candidates(item['history'])
correct_values = [CORRECTIONS[value] if value in CORRECTIONS else value for value in values]
belief_states_data_item['values'] = correct_values
# extract belief states # extract belief states
belief_states = item['belief'] belief_states = item['belief']
@ -127,11 +148,18 @@ def create_belief_states_data_for_prompt_learning(data_tuple):
# add belief states list to data item map (will be saved) # add belief states list to data item map (will be saved)
belief_states_data_item['belief_states'] = belief_slot_value_list belief_states_data_item['belief_states'] = belief_slot_value_list
# update tqdm progress
progress.update(1)
# add to the dataset (to be saved!) # add to the dataset (to be saved!)
belief_states_dataset.append(belief_states_data_item) belief_states_dataset.append(belief_states_data_item)
if data_tuple[1] in ['test', 'valid'] and nlp is not None: # close tqdm progress
nlp.close() progress.close()
# stop CoreNLP server
if data_tuple[1] in ['test', 'valid'] and extractor is not None:
extractor.stop()
# save the dataset file # save the dataset file
save_file_path = '../data/prompt-learning/' + data_tuple[2] + '/' save_file_path = '../data/prompt-learning/' + data_tuple[2] + '/'
save_file_name = data_tuple[1] + '.soloist.json' save_file_name = data_tuple[1] + '.soloist.json'
@ -145,6 +173,8 @@ def create_belief_states_data_for_prompt_learning(data_tuple):
data_list = [ data_list = [
("../data/baseline/test/test.soloist.json", "test", "test"), ("../data/baseline/test/test.soloist.json", "test", "test"),
("../data/baseline/valid/valid.soloist.json", "valid", "valid"), ("../data/baseline/valid/valid.soloist.json", "valid", "valid"),
("../data/baseline/5-dpd/train.soloist.json", "train", "5-dpd"),
("../data/baseline/10-dpd/train.soloist.json", "train", "10-dpd"),
("../data/baseline/50-dpd/train.soloist.json", "train", "50-dpd"), ("../data/baseline/50-dpd/train.soloist.json", "train", "50-dpd"),
("../data/baseline/100-dpd/train.soloist.json", "train", "100-dpd"), ("../data/baseline/100-dpd/train.soloist.json", "train", "100-dpd"),
("../data/baseline/125-dpd/train.soloist.json", "train", "125-dpd"), ("../data/baseline/125-dpd/train.soloist.json", "train", "125-dpd"),

@ -0,0 +1,372 @@
east AREA
west AREA
north AREA
south AREA
centre AREA
center AREA
london kings cross PLACE
london kings cross train station PLACE
huntingdon PLACE
ely PLACE
hamilton lodge PLACE
wandlebury country park PLACE
stevenage PLACE
huntington marriott PLACE
brookshite PLACE
cineworld PLACE
stansted airport PLACE
stansted airport train station PLACE
bournemouth PLACE
norwich PLACE
gourmet burger kitchen PLACE
aylesbray lodge guest PLACE
stratford PLACE
london liverpool street PLACE
copper kettle PLACE
kettles yard PLACE
kings lynn PLACE
liverpool PLACE
leicester PLACE
cafe uno PLACE
curry prince PLACE
london PLACE
peterborough PLACE
east london PLACE
birmingham new street PLACE
liverpool street PLACE
london liverpool PLACE
clare college PLACE
camboats PLACE
stevenage train station PLACE
parkside police station PLACE
birmingham new street PLACE
jesus green outdoor pool PLACE
sheep's green and lammas land park fen causeway PLACE
broxbourne PLACE
broxbourne train station PLACE
duxford PLACE
bishops stortford PLACE
alpha milton PLACE
alpha-milton PLACE
alpha-milton guest house PLACE
apha-milton PLACE
norway PLACE
huntingdon marriott hotel PLACE
glastonbury PLACE
city hall PLACE
hughes hall PLACE
city centre north PLACE
city centre north b and b PLACE
cafe jello gallery PLACE
the junction PLACE
riverboat georgina PLACE
pizza hut fen ditton PLACE
panahar PLACE
hobsons house PLACE
funky fun house PLACE
avalon PLACE
regency gallery PLACE
churchills college PLACE
christs college PLACE
holy trinity church PLACE
cineworld cinema PLACE
hotel PLACE_TYPE
guesthouse PLACE_TYPE
architecture PLACE_TYPE
boat PLACE_TYPE
boating PLACE_TYPE
church PLACE_TYPE
cinema PLACE_TYPE
cinemas PLACE_TYPE
college PLACE_TYPE
concert hall PLACE_TYPE
concerthall PLACE_TYPE
entertainment PLACE_TYPE
gallery PLACE_TYPE
gastropub PLACE_TYPE
hiking PLACE_TYPE
historical PLACE_TYPE
hotel PLACE_TYPE
multiple sports PLACE_TYPE
museum PLACE_TYPE
museums PLACE_TYPE
night club PLACE_TYPE
nightclub PLACE_TYPE
outdoor PLACE_TYPE
park PLACE_TYPE
pool PLACE_TYPE
special PLACE_TYPE
sports PLACE_TYPE
swimming pool PLACE_TYPE
theater PLACE_TYPE
theatre PLACE_TYPE
theatres PLACE_TYPE
copper kettle PLACE
taj tandoori PLACE
charlie chan PLACE
one seven PLACE
eraina and michaelhouse cafe PLACE
rosas bed and breakfast PLACE
mahal of cambridge PLACE
restaurant two two PLACE
curry prince PLACE
golden curry PLACE
missing sock PLACE
frankie and bennys PLACE
kymmoy PLACE
dojo noodle bar PLACE
the bedouin PLACE
restaurant alimentum PLACE
gourmet burger kitchen PLACE
la margherita PLACE
golden house PLACE
chiquito PLACE
darrys cookhouse and wine shop PLACE
scudamores punt PLACE
eraina PLACE
the varsity restaurant PLACE
pizza hut fenditton PLACE
saint johns chop house PLACE
saint johns chop shop house PLACE
curry garden PLACE
sala thong PLACE
bangkok city PLACE
jinling noodle bar PLACE
sitar tandoori PLACE
pizza hut PLACE
the nirala PLACE
city stop restaurant PLACE
de luca cucina and bar riverside brasserie PLACE
rice boat PLACE
wise buddha PLACE
restaurant one seven PLACE
meze bar restaurant PLACE
yipee noodle bar PLACE
little seoul PLACE
rajmahal PLACE
ali baba PLACE
limehouse PLACE
the grafton hotel PLACE
barbakan PLACE
sesame restaurant and bar PLACE
golden wok PLACE
pizza hut city centre PLACE
binh PLACE
hotel du vin and bistro PLACE
lan hong house PLACE
tandoori palace PLACE
meghna PLACE
alimentum PLACE
the oak bistro PLACE
anatolia PLACE
ian hong house PLACE
two two and cote PLACE
curry queen PLACE
backstreet bistro PLACE
mahal PLACE
la mimosa PLACE
shanghai family restaurant PLACE
molecular gastronomy PLACE
efes PLACE
cote PLACE
good luck PLACE
cafe uno PLACE
oak bistro PLACE
european PLACE
saffron brasserie PLACE
gardenia PLACE
de luca cucina and bar PLACE
two two PLACE
ashley hotel PLACE
the hotpot PLACE
michaelhouse cafe PLACE
yu garden PLACE
gourmet formal kitchen PLACE
lovel PLACE
efes restaurant PLACE
the peking restaurant PLACE
river bar steakhouse and grill PLACE
the slug and lettuce PLACE
yippee noodle bar PLACE
meze bar PLACE
rice house PLACE
clowns cafe PLACE
hobsons house PLACE
dojo noodle bar PLACE
the kohinoor PLACE
royal standard PLACE
bloomsbury restaurant PLACE
graffiti PLACE
el shaddia PLACE
el shaddia guesthouse PLACE
a and b guest house PLACE
gonville hotel PLACE
worth house PLACE
shiraz PLACE
the golden house PLACE
shanghai PLACE
don pasquale pizzeria PLACE
tandoori palace PLACE
autumn house PLACE
panahar PLACE
the peking PLACE
wagamama PLACE
tandoori PLACE
nusha PLACE
gandhi PLACE
the gandhi PLACE
nandos city centre PLACE
slug and lettuce PLACE
sitar PLACE
alex PLACE
cambridge chop house PLACE
cambridge arts theatre PLACE
the missing sock PLACE
primavera PLACE
the meze bar PLACE
travellers rest PLACE
curry king PLACE
pipasha restaurant PLACE
cambridge punter PLACE
saigon city PLACE
bedouin PLACE
pizza express PLACE
broughton house gallery PLACE
williams art and antiques PLACE
tang chinese PLACE
curry prince PLACE
charlie PLACE
nirala PLACE
nandos PLACE
cotto PLACE
parkside pools PLACE
galleria PLACE
adden PLACE
funky PLACE
hamilton lodge PLACE
maharajah tandoori restaurant PLACE
the cow pizza kitchen and bar PLACE
grafton hotel PLACE
cow pizza kitchen and bar PLACE
midsummer house restaurant PLACE
the lucky star PLACE
prezzo PLACE
cambridge lodge restaurant PLACE
sala thong PLACE
Kohinoor PLACE
zizzi cambridge PLACE
pizza hut cherry hinton PLACE
4 kings parade city centre PLACE
golden wok PLACE
nus PLACE
fitzbillies restaurant PLACE
lucky star PLACE
la tasca PLACE
loch fyne PLACE
the gardenia PLACE
chiquito restaurant bar PLACE
anatolia and efes restaurant PLACE
ugly duckling PLACE
cocum PLACE
hk fusion PLACE
stazione restaurant and coffee bar PLACE
restaurant 22 PLACE
grafton hotel restaurant PLACE
the maharajah tandoor PLACE
the alex PLACE
thanh binh PLACE
the river bar steakhouse and grill PLACE
india house PLACE
peking restaurant PLACE
hotpot PLACE
kohinoor PLACE
la raza PLACE
da vinci pizzeria PLACE
pizza hut cherry hinton PLACE
alimentum PLACE
royal spice PLACE
riverside brasserie PLACE
kitchen and bar PLACE
whale of a time PLACE
afghan FOOD_TYPE
african FOOD_TYPE
american FOOD_TYPE
asian FOOD_TYPE
asian oriental FOOD_TYPE
australian FOOD_TYPE
austrian FOOD_TYPE
barbeque FOOD_TYPE
basque FOOD_TYPE
belgian FOOD_TYPE
bistro FOOD_TYPE
brazilian FOOD_TYPE
british FOOD_TYPE
canapes FOOD_TYPE
cantonese FOOD_TYPE
caribbean FOOD_TYPE
catalan FOOD_TYPE
chinese FOOD_TYPE
corsica FOOD_TYPE
crossover FOOD_TYPE
cuban FOOD_TYPE
danish FOOD_TYPE
eastern european FOOD_TYPE
english FOOD_TYPE
eritrean FOOD_TYPE
european FOOD_TYPE
french FOOD_TYPE
german FOOD_TYPE
greek FOOD_TYPE
halal FOOD_TYPE
hungarian FOOD_TYPE
indian FOOD_TYPE
indonesian FOOD_TYPE
international FOOD_TYPE
irish FOOD_TYPE
italian FOOD_TYPE
jamaican FOOD_TYPE
japanese FOOD_TYPE
korean FOOD_TYPE
kosher FOOD_TYPE
latin american FOOD_TYPE
lebanese FOOD_TYPE
malaysian FOOD_TYPE
mediterranean FOOD_TYPE
mexican FOOD_TYPE
middle eastern FOOD_TYPE
modern american FOOD_TYPE
modern english FOOD_TYPE
moroccan FOOD_TYPE
north african FOOD_TYPE
north american FOOD_TYPE
north indian FOOD_TYPE
northern european FOOD_TYPE
panasian FOOD_TYPE
persian FOOD_TYPE
polish FOOD_TYPE
polynesian FOOD_TYPE
portugese FOOD_TYPE
portuguese FOOD_TYPE
romanian FOOD_TYPE
russian FOOD_TYPE
scandinavian FOOD_TYPE
scottish FOOD_TYPE
seafood FOOD_TYPE
singaporean FOOD_TYPE
south african FOOD_TYPE
south indian FOOD_TYPE
spanish FOOD_TYPE
sri lankan FOOD_TYPE
sushi FOOD_TYPE
swedish FOOD_TYPE
swiss FOOD_TYPE
thai FOOD_TYPE
thai and chinese FOOD_TYPE
traditional american FOOD_TYPE
turkish FOOD_TYPE
tuscan FOOD_TYPE
vegetarian FOOD_TYPE
venetian FOOD_TYPE
vietnamese FOOD_TYPE
welsh FOOD_TYPE
dont care MISC
Loading…
Cancel
Save