From 1b51b22fdcc2a66744521598b941dc4d12982ae6 Mon Sep 17 00:00:00 2001
From: Pavan Mandava <mspavan04@gmail.com>
Date: Thu, 23 Jan 2020 20:08:37 +0100
Subject: [PATCH] Added csv read/write to run.py, TODOs added README files for
 json and xml_files folder

---
 json/README.md          |  2 ++
 run.py                  | 53 ++++++++++++++++++++++++++++++++++++++++-
 txt_parser/csv_utils.py |  4 ++--
 txt_parser/test_txt.py  |  4 ++--
 xml_files/README.md     |  2 ++
 5 files changed, 60 insertions(+), 5 deletions(-)
 create mode 100644 json/README.md
 create mode 100644 xml_files/README.md
diff --git a/json/README.md b/json/README.md
new file mode 100644
index 0000000..f75e193
--- /dev/null
+++ b/json/README.md
@@ -0,0 +1,2 @@
+This Folder should only be used to save **books.json**. <br/>
+Don't manipulate JSON by hand, the code handles everything
\ No newline at end of file
diff --git a/run.py b/run.py
index f6d4028..02402c9 100644
--- a/run.py
+++ b/run.py
@@ -5,6 +5,7 @@ import utils.json_utils as json_utils
 import utils.constants as const
 import utils.env_utils as env
 import xml_parser.create_xml as create_xml
+import txt_parser.csv_utils as csv_utils
 from csv2df import get_book_content, get_book_metadata
 
 
@@ -35,11 +36,61 @@ def save_validated_files_to_db():
     json_utils.write_json_file(const.JSON_PATH, json_data)
 
 
+def read_data_files_and_align_sentences(book_code):
+    books_list = csv_utils.read_books_csv_file(const.CSV_FILE)
+    books_dict = {}
+    for book in books_list:
+        if book[1] not in books_dict:
+            books_dict[book[1]] = []
+        books_dict[book[1]].append(book)
+
+    if book_code in books_dict:
+        book_code_list = books_dict[book_code]
+
+        for book in book_code_list:
+            book_lines = csv_utils.read_data_file(book[3].strip())
+            # TODO (for Jassi) :: Take this 'book_lines' and return dictionary after parsing chapters
+            # TODO :: Please Follow the below Dictionary Structure, ==
+            # Later Isaac will use this dict structure to align sentences
+            # book_dict = {
+            #     'meta_data': {
+            #         "book_id": "",
+            #         "title": "",
+            #         "lang": "",
+            #         "isTranslation": "",
+            #         "totalChapters": "",
+            #         "authors": [
+            #             {
+            #                 "name": "",
+            #                 "translator": ""
+            #             },
+            #             {
+            #                 "name": ""
+            #             }
+            #         ],
+            #         "description": "", # Optional
+            #         "source": ""
+            #     },
+            #     'content' : [
+            #         {
+            #             'chapter_num': '',
+            #             'chapter_name': '',
+            #             'text_content': ''
+            #         },
+            #         {
+            #             'chapter_num': '',
+            #             'chapter_name': '',
+            #             'text_content': ''
+            #         }
+            #     ]
+            # }
+
+
 def create_xml_file(book_content_dict, book_metadata_dict):
     create_xml.create_xml_file(book_content_dict, book_metadata_dict)
 
 
 if env.check_env_variables():
-    create_xml_file(get_book_content(), get_book_metadata())
+    read_data_files_and_align_sentences('dost_cap')
     # validate_all_xml_files()
     # save_validated_files_to_db()
diff --git a/txt_parser/csv_utils.py b/txt_parser/csv_utils.py
index acacf4a..7e1353d 100644
--- a/txt_parser/csv_utils.py
+++ b/txt_parser/csv_utils.py
@@ -29,8 +29,8 @@ def write_books_data_to_csv(csv_file_name, books_list):
             writer.writerow(book)
 
 
-def read_data_file(file_path):
-    txt_file_path = os.path.dirname(os.path.dirname(__file__)) + const.DATA_FOLDER + file_path
+def read_data_file(file_name):
+    txt_file_path = os.path.dirname(os.path.dirname(__file__)) + const.DATA_FOLDER + file_name
     with open(txt_file_path, 'r') as file:
         lines = file.readline()
         file.close()
diff --git a/txt_parser/test_txt.py b/txt_parser/test_txt.py
index b4a27e5..a9f45a8 100644
--- a/txt_parser/test_txt.py
+++ b/txt_parser/test_txt.py
@@ -5,6 +5,6 @@ books_list = read_csv.read_books_csv_file(const.CSV_FILE)
 
 for book in books_list:
     print(book)
-    print(type(book))
 
-read_csv.write_books_data_to_csv(const.CSV_FILE, books_list)
\ No newline at end of file
+
+# read_csv.write_books_data_to_csv(const.CSV_FILE, books_list)
diff --git a/xml_files/README.md b/xml_files/README.md
new file mode 100644
index 0000000..62b82ec
--- /dev/null
+++ b/xml_files/README.md
@@ -0,0 +1,2 @@
+This Folder should only be used to save **XML Files**. <br/>
+Don't manipulate any XML File by hand
\ No newline at end of file