From 02bffc0132b06a868623a7b9e9c4977cf260e8f8 Mon Sep 17 00:00:00 2001 From: Pavan Mandava Date: Wed, 22 Jan 2020 23:49:32 +0100 Subject: [PATCH] Projekt Structure changed --- {utils/txt_preparation => aligner}/__init__.py | 0 bitext_align.py => aligner/bitext_align.py | 15 +++++---------- .../book_structure.xml => book_structure.xml | 0 run.py | 6 +++--- txt_parser/__init__.py | 0 .../txt_preparation => txt_parser}/txt_cleaner.py | 0 utils/constants.py | 2 +- {xml_files => xml_parser}/book.xsd | 0 8 files changed, 9 insertions(+), 14 deletions(-) rename {utils/txt_preparation => aligner}/__init__.py (100%) rename bitext_align.py => aligner/bitext_align.py (97%) rename xml_files/book_structure.xml => book_structure.xml (100%) create mode 100644 txt_parser/__init__.py rename {utils/txt_preparation => txt_parser}/txt_cleaner.py (100%) rename {xml_files => xml_parser}/book.xsd (100%) diff --git a/utils/txt_preparation/__init__.py b/aligner/__init__.py similarity index 100% rename from utils/txt_preparation/__init__.py rename to aligner/__init__.py diff --git a/bitext_align.py b/aligner/bitext_align.py similarity index 97% rename from bitext_align.py rename to aligner/bitext_align.py index 5068a02..2970dcc 100644 --- a/bitext_align.py +++ b/aligner/bitext_align.py @@ -1,18 +1,13 @@ # -*- coding: utf-8 -*- -import os,sys -import re -import pandas as pd +from itertools import product as cp + import numpy as np -from numpy import cumsum -from pandas import DataFrame -from nltk import word_tokenize, sent_tokenize -#import xml.etree.ElementTree as ET -from jellyfish import levenshtein_distance as lev -#import six +import pandas as pd from google.cloud import translate_v2 as translate -from itertools import product as cp +from jellyfish import levenshtein_distance as lev +from nltk import sent_tokenize translate_client = translate.Client() diff --git a/xml_files/book_structure.xml b/book_structure.xml similarity index 100% rename from xml_files/book_structure.xml rename to book_structure.xml diff --git a/run.py b/run.py index 5be3377..0a0c5aa 100644 --- a/run.py +++ b/run.py @@ -17,10 +17,10 @@ def save_validated_files_to_db(): books_list = books_json[book_code] for book in books_list: if not book['is_validated']: - print(const.WARNING, 'Book : ', book['xml_file'], ' is not validated against XSD', const.END) + print(const.WARNING, 'XML File :: ', book['xml_file'], ' is not validated against XSD', const.END) continue if not book['is_saved_to_db']: - print(const.BLUE, 'Adding Book : ', book['xml_file'], ' to the DB', const.END) + print(const.BLUE, 'Adding Book Data : ', book['xml_file'], ' to the DB', const.END) book_dict = read_xml.parse_xml_file(book['xml_file_path']) result = adb.add_book_to_db(book_code, book_dict) book['is_saved_to_db'] = result @@ -35,4 +35,4 @@ def save_validated_files_to_db(): if env.check_env_variables(): validate_all_xml_files() - save_validated_files_to_db() \ No newline at end of file + # save_validated_files_to_db() \ No newline at end of file diff --git a/txt_parser/__init__.py b/txt_parser/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/txt_preparation/txt_cleaner.py b/txt_parser/txt_cleaner.py similarity index 100% rename from utils/txt_preparation/txt_cleaner.py rename to txt_parser/txt_cleaner.py diff --git a/utils/constants.py b/utils/constants.py index f8c4860..9d91590 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -1,6 +1,6 @@ JSON_PATH = 'json/books.json' -XSD_PATH = 'xml_files/book.xsd' +XSD_PATH = 'xml_parser/book.xsd' TRANSLATE_ENV_VAR = 'GOOGLE_APPLICATION_CREDENTIALS' diff --git a/xml_files/book.xsd b/xml_parser/book.xsd similarity index 100% rename from xml_files/book.xsd rename to xml_parser/book.xsd