From b0a1359401f7b34ab46045b88ee6974e0f395652 Mon Sep 17 00:00:00 2001 From: Isaac Riley Date: Wed, 29 Jan 2020 18:19:38 +0100 Subject: [PATCH] added small cleaning step --- aligner/bitext_align.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/aligner/bitext_align.py b/aligner/bitext_align.py index 091c5c1..c9a086a 100644 --- a/aligner/bitext_align.py +++ b/aligner/bitext_align.py @@ -20,6 +20,8 @@ translate_client = translate.Client() def master_align(text0, text1, lang0, lang1): """ Takes two equivalent texts (original and trnslation) and returns aligned texts. """ + text0 = re.sub(' ?. . . ?| … ?| ?... ?', '… ', text0) + text1 = re.sub(' ?. . . ?| … ?| ?... ?', '… ', text1) df0 = frame_from_text(text0, lang0, lang1) # print('A') df1 = frame_from_text(text1, lang1, lang0, is1=True)