From ea24fa832521facc070a7eab122b9ecf7b055769 Mon Sep 17 00:00:00 2001 From: Pavan Mandava Date: Thu, 16 Jan 2020 01:03:11 +0100 Subject: [PATCH] Added JSON for saving XML Book files data and some improvements --- csv2df.py | 2 +- {xml => xml_files}/book_structure.xml | 0 xml_parser/__init__.py | 12 +++++++ create_xml.py => xml_parser/create_xml.py | 39 ++++++++++++++++++++--- test.py => xml_parser/test.py | 2 +- 5 files changed, 48 insertions(+), 7 deletions(-) rename {xml => xml_files}/book_structure.xml (100%) create mode 100644 xml_parser/__init__.py rename create_xml.py => xml_parser/create_xml.py (65%) rename test.py => xml_parser/test.py (69%) diff --git a/csv2df.py b/csv2df.py index 43a2518..d3fcd66 100644 --- a/csv2df.py +++ b/csv2df.py @@ -13,7 +13,7 @@ def get_book_content(): ch_id = row['chapter'] s_id = row['sentence'] text = row['text'] - print(ch_id, " -> ", s_id, " -> ", text) + # print(ch_id, " -> ", s_id, " -> ", text) if ch_id not in book_dict: book_dict[ch_id] = [] diff --git a/xml/book_structure.xml b/xml_files/book_structure.xml similarity index 100% rename from xml/book_structure.xml rename to xml_files/book_structure.xml diff --git a/xml_parser/__init__.py b/xml_parser/__init__.py new file mode 100644 index 0000000..4677764 --- /dev/null +++ b/xml_parser/__init__.py @@ -0,0 +1,12 @@ +from pathlib import Path +import json + +json_file_path = Path('json/books.json') + +json_data = {'books': []} +if not json_file_path.is_file(): + json_file = open(json_file_path, 'w') + json_file.write(json.dumps(json_data, indent=4)) + json_file.close() + print('JSON File Created :: '+json_file.name) + diff --git a/create_xml.py b/xml_parser/create_xml.py similarity index 65% rename from create_xml.py rename to xml_parser/create_xml.py index 5b25430..16ce2a7 100644 --- a/create_xml.py +++ b/xml_parser/create_xml.py @@ -1,5 +1,8 @@ from xml.etree import ElementTree as ET from xml.dom import minidom +import os +import json +from pathlib import Path def create_xml_file(book_dict, book_metadata): @@ -50,14 +53,40 @@ def create_xml_file(book_dict, book_metadata): # tree = ET.ElementTree(book_root) # tree.write(filename) + root_dir = os.path.dirname(os.path.dirname(__file__)) + output_dir = os.path.join(root_dir, "xml_files") filename = book_root.get('id') + "_" + lang.text + ".xml" - file = open(filename, 'w') + file = open(output_dir + '/' + filename, 'w') + file_path = file.name + print('XML File Path :: ', file_path) file.write(prettify(book_root)) + file.close() + json_obj = {} + json_obj['book_id'] = book_root.get('id') + json_obj['xml_file'] = filename + json_obj['lang'] = lang.text + json_obj['xml_file_path'] = file_path + json_obj['is_validated'] = False + json_obj['is_saved_to_db'] = False + add_xml_book_data_to_json(json_obj) -def prettify(element): +def add_xml_book_data_to_json(json_obj): + json_file_path = Path('json/books.json') + + json_file = open(json_file_path, 'r') + json_data = json.load(json_file) + json_file.close() + + json_file = open(json_file_path, 'w') + json_data['books'].append(json_obj) + json_file.write(json.dumps(json_data, indent=4)) + json_file.close() + + +def prettify(root): """ Return a pretty-printed XML string for the Element. - """ - rough_string = ET.tostring(element, 'utf-8') + """ + rough_string = ET.tostring(root, 'utf-8') parsed = minidom.parseString(rough_string) - return parsed.toprettyxml(indent=" ") + return parsed.toprettyxml(indent="\t") diff --git a/test.py b/xml_parser/test.py similarity index 69% rename from test.py rename to xml_parser/test.py index 02ee0b3..40a0f6f 100644 --- a/test.py +++ b/xml_parser/test.py @@ -1,6 +1,6 @@ from csv2df import get_book_content, get_book_metadata -from create_xml import create_xml_file +from xml_parser.create_xml import create_xml_file create_xml_file(get_book_content(), get_book_metadata())