From ea24fa832521facc070a7eab122b9ecf7b055769 Mon Sep 17 00:00:00 2001
From: Pavan Mandava <mspavan04@gmail.com>
Date: Thu, 16 Jan 2020 01:03:11 +0100
Subject: [PATCH] Added JSON for saving XML Book files data and some
 improvements

---
 csv2df.py                                 |  2 +-
 {xml => xml_files}/book_structure.xml     |  0
 xml_parser/__init__.py                    | 12 +++++++
 create_xml.py => xml_parser/create_xml.py | 39 ++++++++++++++++++++---
 test.py => xml_parser/test.py             |  2 +-
 5 files changed, 48 insertions(+), 7 deletions(-)
 rename {xml => xml_files}/book_structure.xml (100%)
 create mode 100644 xml_parser/__init__.py
 rename create_xml.py => xml_parser/create_xml.py (65%)
 rename test.py => xml_parser/test.py (69%)

diff --git a/csv2df.py b/csv2df.py
index 43a2518..d3fcd66 100644
--- a/csv2df.py
+++ b/csv2df.py
@@ -13,7 +13,7 @@ def get_book_content():
         ch_id = row['chapter']
         s_id = row['sentence']
         text = row['text']
-        print(ch_id, " -> ", s_id, " -> ", text)
+        # print(ch_id, " -> ", s_id, " -> ", text)
 
         if ch_id not in book_dict:
             book_dict[ch_id] = []
diff --git a/xml/book_structure.xml b/xml_files/book_structure.xml
similarity index 100%
rename from xml/book_structure.xml
rename to xml_files/book_structure.xml
diff --git a/xml_parser/__init__.py b/xml_parser/__init__.py
new file mode 100644
index 0000000..4677764
--- /dev/null
+++ b/xml_parser/__init__.py
@@ -0,0 +1,12 @@
+from pathlib import Path
+import json
+
+json_file_path = Path('json/books.json')
+
+json_data = {'books': []}
+if not json_file_path.is_file():
+    json_file = open(json_file_path, 'w')
+    json_file.write(json.dumps(json_data, indent=4))
+    json_file.close()
+    print('JSON File Created :: '+json_file.name)
+
diff --git a/create_xml.py b/xml_parser/create_xml.py
similarity index 65%
rename from create_xml.py
rename to xml_parser/create_xml.py
index 5b25430..16ce2a7 100644
--- a/create_xml.py
+++ b/xml_parser/create_xml.py
@@ -1,5 +1,8 @@
 from xml.etree import ElementTree as ET
 from xml.dom import minidom
+import os
+import json
+from pathlib import Path
 
 
 def create_xml_file(book_dict, book_metadata):
@@ -50,14 +53,40 @@ def create_xml_file(book_dict, book_metadata):
 
     # tree = ET.ElementTree(book_root)
     # tree.write(filename)
+    root_dir = os.path.dirname(os.path.dirname(__file__))
+    output_dir = os.path.join(root_dir, "xml_files")
     filename = book_root.get('id') + "_" + lang.text + ".xml"
-    file = open(filename, 'w')
+    file = open(output_dir + '/' + filename, 'w')
+    file_path = file.name
+    print('XML File Path :: ', file_path)
     file.write(prettify(book_root))
+    file.close()
+    json_obj = {}
+    json_obj['book_id'] = book_root.get('id')
+    json_obj['xml_file'] = filename
+    json_obj['lang'] = lang.text
+    json_obj['xml_file_path'] = file_path
+    json_obj['is_validated'] = False
+    json_obj['is_saved_to_db'] = False
+    add_xml_book_data_to_json(json_obj)
 
 
-def prettify(element):
+def add_xml_book_data_to_json(json_obj):
+    json_file_path = Path('json/books.json')
+
+    json_file = open(json_file_path, 'r')
+    json_data = json.load(json_file)
+    json_file.close()
+
+    json_file = open(json_file_path, 'w')
+    json_data['books'].append(json_obj)
+    json_file.write(json.dumps(json_data, indent=4))
+    json_file.close()
+
+
+def prettify(root):
     """ Return a pretty-printed XML string for the Element.
-        """
-    rough_string = ET.tostring(element, 'utf-8')
+    """
+    rough_string = ET.tostring(root, 'utf-8')
     parsed = minidom.parseString(rough_string)
-    return parsed.toprettyxml(indent="    ")
+    return parsed.toprettyxml(indent="\t")
diff --git a/test.py b/xml_parser/test.py
similarity index 69%
rename from test.py
rename to xml_parser/test.py
index 02ee0b3..40a0f6f 100644
--- a/test.py
+++ b/xml_parser/test.py
@@ -1,6 +1,6 @@
 from csv2df import get_book_content, get_book_metadata
 
-from create_xml import create_xml_file
+from xml_parser.create_xml import create_xml_file
 
 create_xml_file(get_book_content(), get_book_metadata())