diff --git a/utils/txt_preparation/__init__.py b/aligner/__init__.py similarity index 100% rename from utils/txt_preparation/__init__.py rename to aligner/__init__.py diff --git a/bitext_align.py b/aligner/bitext_align.py similarity index 97% rename from bitext_align.py rename to aligner/bitext_align.py index 5068a02..2970dcc 100644 --- a/bitext_align.py +++ b/aligner/bitext_align.py @@ -1,18 +1,13 @@ # -*- coding: utf-8 -*- -import os,sys -import re -import pandas as pd +from itertools import product as cp + import numpy as np -from numpy import cumsum -from pandas import DataFrame -from nltk import word_tokenize, sent_tokenize -#import xml.etree.ElementTree as ET -from jellyfish import levenshtein_distance as lev -#import six +import pandas as pd from google.cloud import translate_v2 as translate -from itertools import product as cp +from jellyfish import levenshtein_distance as lev +from nltk import sent_tokenize translate_client = translate.Client() diff --git a/xml_files/book_structure.xml b/book_structure.xml similarity index 100% rename from xml_files/book_structure.xml rename to book_structure.xml diff --git a/run.py b/run.py index 5be3377..0a0c5aa 100644 --- a/run.py +++ b/run.py @@ -17,10 +17,10 @@ def save_validated_files_to_db(): books_list = books_json[book_code] for book in books_list: if not book['is_validated']: - print(const.WARNING, 'Book : ', book['xml_file'], ' is not validated against XSD', const.END) + print(const.WARNING, 'XML File :: ', book['xml_file'], ' is not validated against XSD', const.END) continue if not book['is_saved_to_db']: - print(const.BLUE, 'Adding Book : ', book['xml_file'], ' to the DB', const.END) + print(const.BLUE, 'Adding Book Data : ', book['xml_file'], ' to the DB', const.END) book_dict = read_xml.parse_xml_file(book['xml_file_path']) result = adb.add_book_to_db(book_code, book_dict) book['is_saved_to_db'] = result @@ -35,4 +35,4 @@ def save_validated_files_to_db(): if env.check_env_variables(): validate_all_xml_files() - save_validated_files_to_db() \ No newline at end of file + # save_validated_files_to_db() \ No newline at end of file diff --git a/txt_parser/__init__.py b/txt_parser/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/txt_preparation/txt_cleaner.py b/txt_parser/txt_cleaner.py similarity index 100% rename from utils/txt_preparation/txt_cleaner.py rename to txt_parser/txt_cleaner.py diff --git a/utils/constants.py b/utils/constants.py index f8c4860..9d91590 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -1,6 +1,6 @@ JSON_PATH = 'json/books.json' -XSD_PATH = 'xml_files/book.xsd' +XSD_PATH = 'xml_parser/book.xsd' TRANSLATE_ENV_VAR = 'GOOGLE_APPLICATION_CREDENTIALS' diff --git a/xml_files/book.xsd b/xml_parser/book.xsd similarity index 100% rename from xml_files/book.xsd rename to xml_parser/book.xsd