From 7ac62ab66d939452d88c4a1b74d94420ac64a772 Mon Sep 17 00:00:00 2001 From: Isaac Riley Date: Sun, 17 May 2020 20:27:26 +0200 Subject: [PATCH] changed 'match' to 'search' - should work now --- feature_extraction/features.py | 2 +- utils/constants.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/feature_extraction/features.py b/feature_extraction/features.py index d07cc26..3b2e360 100644 --- a/feature_extraction/features.py +++ b/feature_extraction/features.py @@ -33,7 +33,7 @@ def extract_features_from_text(text: str): # and match it with the input text if feature in REGEX_CONSTANTS: pattern = REGEX_CONSTANTS[feature] - if bool(pattern.match(text)): + if bool(pattern.search(text)): text_feature_list.append(feature) continue diff --git a/utils/constants.py b/utils/constants.py index 6d8811b..7f983f9 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -7,7 +7,7 @@ AVG_MACRO = 'MACRO' REGEX_CONSTANTS = { # Regex for matching Acronym Patterns -> COVID-19 / SEKA / SMY2 / EAP1 / SCP16 / ASC1 / DENV-2 - 'ACRONYM': re.compile(r"[m0-9\W]([A-Z]{2,})[s0-9\W]"), + 'ACRONYM': re.compile(r"[m0-9\W^]([A-Z]{2,})[s\.,:\-$]"), # Regex for matching Years in the text - > 1995 / 2020 / 2019 'CONTAINS_YEAR': re.compile(r"(?<=[^0-9])1[8-9][0-9]{2}(?=[^0-9$])|(?<=[^0-9])20[0-2][0-9](?=[^0-9$])"),