You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

97 lines
3.5 KiB

import xml_parser.read_xml as read_xml
import db.add_book as adb
import xml_parser.validate as validate
import utils.json_utils as json_utils
import utils.constants as const
import utils.env_utils as env
import xml_parser.create_xml as create_xml
import txt_parser.csv_utils as csv_utils
from csv2df import get_book_content, get_book_metadata
def validate_all_xml_files():
validate.validate_all_xml_files()
def save_validated_files_to_db():
json_data = json_utils.read_json_file(const.JSON_PATH)
books_json = json_data['books']
for book_code in books_json.keys():
books_list = books_json[book_code]
for book in books_list:
if not book['is_validated']:
print(const.WARNING, 'XML File :: ', book['xml_file'], ' is not validated against XSD', const.END)
continue
if not book['is_saved_to_db']:
print(const.BLUE, 'Adding Book Data : ', book['xml_file'], ' to the DB', const.END)
book_dict = read_xml.parse_xml_file(book['xml_file_path'])
result = adb.add_book_to_db(book_code, book_dict)
book['is_saved_to_db'] = result
w_str = const.WARNING
if result:
w_str = const.BLUE
print(w_str, 'Result :: ', result, const.END, '\n')
json_data['books'] = books_json
json_utils.write_json_file(const.JSON_PATH, json_data)
def read_data_files_and_align_sentences(book_code):
books_list = csv_utils.read_books_csv_file(const.CSV_FILE)
books_dict = {}
for book in books_list:
if book[1] not in books_dict:
books_dict[book[1]] = []
books_dict[book[1]].append(book)
if book_code in books_dict:
book_code_list = books_dict[book_code]
for book in book_code_list:
book_lines = csv_utils.read_data_file(book[3].strip())
# TODO (for Jassi) :: Take this 'book_lines' and return dictionary after parsing chapters
# TODO :: Please Follow the below Dictionary Structure, ==
# Later Isaac will use this dict structure to align sentences
# book_dict = {
# 'meta_data': {
# "book_id": "",
# "title": "",
# "lang": "",
# "isTranslation": "",
# "totalChapters": "",
# "authors": [
# {
# "name": "",
# "translator": ""
# },
# {
# "name": ""
# }
# ],
# "description": "", # Optional
# "source": ""
# },
# 'content' : [
# {
# 'chapter_num': '',
# 'chapter_name': '',
# 'text_content': ''
# },
# {
# 'chapter_num': '',
# 'chapter_name': '',
# 'text_content': ''
# }
# ]
# }
def create_xml_file(book_content_dict, book_metadata_dict):
create_xml.create_xml_file(book_content_dict, book_metadata_dict)
if env.check_env_variables():
read_data_files_and_align_sentences('dost_cap')
# validate_all_xml_files()
# save_validated_files_to_db()