diff --git a/json/README.md b/json/README.md
new file mode 100644
index 0000000..f75e193
--- /dev/null
+++ b/json/README.md
@@ -0,0 +1,2 @@
+This Folder should only be used to save **books.json**.
+Don't manipulate JSON by hand, the code handles everything
\ No newline at end of file
diff --git a/run.py b/run.py
index f6d4028..02402c9 100644
--- a/run.py
+++ b/run.py
@@ -5,6 +5,7 @@ import utils.json_utils as json_utils
import utils.constants as const
import utils.env_utils as env
import xml_parser.create_xml as create_xml
+import txt_parser.csv_utils as csv_utils
from csv2df import get_book_content, get_book_metadata
@@ -35,11 +36,61 @@ def save_validated_files_to_db():
json_utils.write_json_file(const.JSON_PATH, json_data)
+def read_data_files_and_align_sentences(book_code):
+ books_list = csv_utils.read_books_csv_file(const.CSV_FILE)
+ books_dict = {}
+ for book in books_list:
+ if book[1] not in books_dict:
+ books_dict[book[1]] = []
+ books_dict[book[1]].append(book)
+
+ if book_code in books_dict:
+ book_code_list = books_dict[book_code]
+
+ for book in book_code_list:
+ book_lines = csv_utils.read_data_file(book[3].strip())
+ # TODO (for Jassi) :: Take this 'book_lines' and return dictionary after parsing chapters
+ # TODO :: Please Follow the below Dictionary Structure, ==
+ # Later Isaac will use this dict structure to align sentences
+ # book_dict = {
+ # 'meta_data': {
+ # "book_id": "",
+ # "title": "",
+ # "lang": "",
+ # "isTranslation": "",
+ # "totalChapters": "",
+ # "authors": [
+ # {
+ # "name": "",
+ # "translator": ""
+ # },
+ # {
+ # "name": ""
+ # }
+ # ],
+ # "description": "", # Optional
+ # "source": ""
+ # },
+ # 'content' : [
+ # {
+ # 'chapter_num': '',
+ # 'chapter_name': '',
+ # 'text_content': ''
+ # },
+ # {
+ # 'chapter_num': '',
+ # 'chapter_name': '',
+ # 'text_content': ''
+ # }
+ # ]
+ # }
+
+
def create_xml_file(book_content_dict, book_metadata_dict):
create_xml.create_xml_file(book_content_dict, book_metadata_dict)
if env.check_env_variables():
- create_xml_file(get_book_content(), get_book_metadata())
+ read_data_files_and_align_sentences('dost_cap')
# validate_all_xml_files()
# save_validated_files_to_db()
diff --git a/txt_parser/csv_utils.py b/txt_parser/csv_utils.py
index acacf4a..7e1353d 100644
--- a/txt_parser/csv_utils.py
+++ b/txt_parser/csv_utils.py
@@ -29,8 +29,8 @@ def write_books_data_to_csv(csv_file_name, books_list):
writer.writerow(book)
-def read_data_file(file_path):
- txt_file_path = os.path.dirname(os.path.dirname(__file__)) + const.DATA_FOLDER + file_path
+def read_data_file(file_name):
+ txt_file_path = os.path.dirname(os.path.dirname(__file__)) + const.DATA_FOLDER + file_name
with open(txt_file_path, 'r') as file:
lines = file.readline()
file.close()
diff --git a/txt_parser/test_txt.py b/txt_parser/test_txt.py
index b4a27e5..a9f45a8 100644
--- a/txt_parser/test_txt.py
+++ b/txt_parser/test_txt.py
@@ -5,6 +5,6 @@ books_list = read_csv.read_books_csv_file(const.CSV_FILE)
for book in books_list:
print(book)
- print(type(book))
-read_csv.write_books_data_to_csv(const.CSV_FILE, books_list)
\ No newline at end of file
+
+# read_csv.write_books_data_to_csv(const.CSV_FILE, books_list)
diff --git a/xml_files/README.md b/xml_files/README.md
new file mode 100644
index 0000000..62b82ec
--- /dev/null
+++ b/xml_files/README.md
@@ -0,0 +1,2 @@
+This Folder should only be used to save **XML Files**.
+Don't manipulate any XML File by hand
\ No newline at end of file