Added csv read/write to run.py,

TODOs added
README files for json and xml_files folder
master
Pavan Mandava 6 years ago
parent 10c77c51c0
commit 1b51b22fdc

@ -0,0 +1,2 @@
This Folder should only be used to save **books.json**. <br/>
Don't manipulate JSON by hand, the code handles everything

@ -5,6 +5,7 @@ import utils.json_utils as json_utils
import utils.constants as const import utils.constants as const
import utils.env_utils as env import utils.env_utils as env
import xml_parser.create_xml as create_xml import xml_parser.create_xml as create_xml
import txt_parser.csv_utils as csv_utils
from csv2df import get_book_content, get_book_metadata from csv2df import get_book_content, get_book_metadata
@ -35,11 +36,61 @@ def save_validated_files_to_db():
json_utils.write_json_file(const.JSON_PATH, json_data) json_utils.write_json_file(const.JSON_PATH, json_data)
def read_data_files_and_align_sentences(book_code):
books_list = csv_utils.read_books_csv_file(const.CSV_FILE)
books_dict = {}
for book in books_list:
if book[1] not in books_dict:
books_dict[book[1]] = []
books_dict[book[1]].append(book)
if book_code in books_dict:
book_code_list = books_dict[book_code]
for book in book_code_list:
book_lines = csv_utils.read_data_file(book[3].strip())
# TODO (for Jassi) :: Take this 'book_lines' and return dictionary after parsing chapters
# TODO :: Please Follow the below Dictionary Structure, ==
# Later Isaac will use this dict structure to align sentences
# book_dict = {
# 'meta_data': {
# "book_id": "",
# "title": "",
# "lang": "",
# "isTranslation": "",
# "totalChapters": "",
# "authors": [
# {
# "name": "",
# "translator": ""
# },
# {
# "name": ""
# }
# ],
# "description": "", # Optional
# "source": ""
# },
# 'content' : [
# {
# 'chapter_num': '',
# 'chapter_name': '',
# 'text_content': ''
# },
# {
# 'chapter_num': '',
# 'chapter_name': '',
# 'text_content': ''
# }
# ]
# }
def create_xml_file(book_content_dict, book_metadata_dict): def create_xml_file(book_content_dict, book_metadata_dict):
create_xml.create_xml_file(book_content_dict, book_metadata_dict) create_xml.create_xml_file(book_content_dict, book_metadata_dict)
if env.check_env_variables(): if env.check_env_variables():
create_xml_file(get_book_content(), get_book_metadata()) read_data_files_and_align_sentences('dost_cap')
# validate_all_xml_files() # validate_all_xml_files()
# save_validated_files_to_db() # save_validated_files_to_db()

@ -29,8 +29,8 @@ def write_books_data_to_csv(csv_file_name, books_list):
writer.writerow(book) writer.writerow(book)
def read_data_file(file_path): def read_data_file(file_name):
txt_file_path = os.path.dirname(os.path.dirname(__file__)) + const.DATA_FOLDER + file_path txt_file_path = os.path.dirname(os.path.dirname(__file__)) + const.DATA_FOLDER + file_name
with open(txt_file_path, 'r') as file: with open(txt_file_path, 'r') as file:
lines = file.readline() lines = file.readline()
file.close() file.close()

@ -5,6 +5,6 @@ books_list = read_csv.read_books_csv_file(const.CSV_FILE)
for book in books_list: for book in books_list:
print(book) print(book)
print(type(book))
read_csv.write_books_data_to_csv(const.CSV_FILE, books_list)
# read_csv.write_books_data_to_csv(const.CSV_FILE, books_list)

@ -0,0 +1,2 @@
This Folder should only be used to save **XML Files**. <br/>
Don't manipulate any XML File by hand
Loading…
Cancel
Save