From 1b51b22fdcc2a66744521598b941dc4d12982ae6 Mon Sep 17 00:00:00 2001 From: Pavan Mandava Date: Thu, 23 Jan 2020 20:08:37 +0100 Subject: [PATCH] Added csv read/write to run.py, TODOs added README files for json and xml_files folder --- json/README.md | 2 ++ run.py | 53 ++++++++++++++++++++++++++++++++++++++++- txt_parser/csv_utils.py | 4 ++-- txt_parser/test_txt.py | 4 ++-- xml_files/README.md | 2 ++ 5 files changed, 60 insertions(+), 5 deletions(-) create mode 100644 json/README.md create mode 100644 xml_files/README.md diff --git a/json/README.md b/json/README.md new file mode 100644 index 0000000..f75e193 --- /dev/null +++ b/json/README.md @@ -0,0 +1,2 @@ +This Folder should only be used to save **books.json**.
+Don't manipulate JSON by hand, the code handles everything \ No newline at end of file diff --git a/run.py b/run.py index f6d4028..02402c9 100644 --- a/run.py +++ b/run.py @@ -5,6 +5,7 @@ import utils.json_utils as json_utils import utils.constants as const import utils.env_utils as env import xml_parser.create_xml as create_xml +import txt_parser.csv_utils as csv_utils from csv2df import get_book_content, get_book_metadata @@ -35,11 +36,61 @@ def save_validated_files_to_db(): json_utils.write_json_file(const.JSON_PATH, json_data) +def read_data_files_and_align_sentences(book_code): + books_list = csv_utils.read_books_csv_file(const.CSV_FILE) + books_dict = {} + for book in books_list: + if book[1] not in books_dict: + books_dict[book[1]] = [] + books_dict[book[1]].append(book) + + if book_code in books_dict: + book_code_list = books_dict[book_code] + + for book in book_code_list: + book_lines = csv_utils.read_data_file(book[3].strip()) + # TODO (for Jassi) :: Take this 'book_lines' and return dictionary after parsing chapters + # TODO :: Please Follow the below Dictionary Structure, == + # Later Isaac will use this dict structure to align sentences + # book_dict = { + # 'meta_data': { + # "book_id": "", + # "title": "", + # "lang": "", + # "isTranslation": "", + # "totalChapters": "", + # "authors": [ + # { + # "name": "", + # "translator": "" + # }, + # { + # "name": "" + # } + # ], + # "description": "", # Optional + # "source": "" + # }, + # 'content' : [ + # { + # 'chapter_num': '', + # 'chapter_name': '', + # 'text_content': '' + # }, + # { + # 'chapter_num': '', + # 'chapter_name': '', + # 'text_content': '' + # } + # ] + # } + + def create_xml_file(book_content_dict, book_metadata_dict): create_xml.create_xml_file(book_content_dict, book_metadata_dict) if env.check_env_variables(): - create_xml_file(get_book_content(), get_book_metadata()) + read_data_files_and_align_sentences('dost_cap') # validate_all_xml_files() # save_validated_files_to_db() diff --git a/txt_parser/csv_utils.py b/txt_parser/csv_utils.py index acacf4a..7e1353d 100644 --- a/txt_parser/csv_utils.py +++ b/txt_parser/csv_utils.py @@ -29,8 +29,8 @@ def write_books_data_to_csv(csv_file_name, books_list): writer.writerow(book) -def read_data_file(file_path): - txt_file_path = os.path.dirname(os.path.dirname(__file__)) + const.DATA_FOLDER + file_path +def read_data_file(file_name): + txt_file_path = os.path.dirname(os.path.dirname(__file__)) + const.DATA_FOLDER + file_name with open(txt_file_path, 'r') as file: lines = file.readline() file.close() diff --git a/txt_parser/test_txt.py b/txt_parser/test_txt.py index b4a27e5..a9f45a8 100644 --- a/txt_parser/test_txt.py +++ b/txt_parser/test_txt.py @@ -5,6 +5,6 @@ books_list = read_csv.read_books_csv_file(const.CSV_FILE) for book in books_list: print(book) - print(type(book)) -read_csv.write_books_data_to_csv(const.CSV_FILE, books_list) \ No newline at end of file + +# read_csv.write_books_data_to_csv(const.CSV_FILE, books_list) diff --git a/xml_files/README.md b/xml_files/README.md new file mode 100644 index 0000000..62b82ec --- /dev/null +++ b/xml_files/README.md @@ -0,0 +1,2 @@ +This Folder should only be used to save **XML Files**.
+Don't manipulate any XML File by hand \ No newline at end of file