You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

43 lines
1.4 KiB

import xml.etree.ElementTree as ET
def parse_xml_file(full_path):
book_dict = {}
tree = ET.parse(full_path)
book_root = tree.getroot()
# print('Root Element :: ', book_root.tag, ' | Attributes :: ', book_root.attrib)
book_dict['code'] = book_root.attrib['code']
book_info_dict = {}
book_content_dict = {}
book_info_element = book_root.find('bookInfo')
book_content_element = book_root.find('content')
book_info_dict['authors'] = []
for child in book_info_element:
if 'author' == child.tag:
author = {'name': child.text}
if 'translator' in child.attrib:
author['translator'] = child.attrib['translator']
book_info_dict['authors'].append(author)
else:
book_info_dict[child.tag] = child.text
book_dict['bookInfo'] = book_info_dict
book_content_dict['chapters'] = []
for chapter in book_content_element:
chapter_dict = {'num': chapter.attrib['num']}
if 'name' in chapter.attrib:
chapter_dict['name'] = chapter.attrib['name']
chapter_dict['sentences'] = {}
for sentence in chapter.findall('sentence'):
chapter_dict['sentences'][sentence.attrib['num']] = sentence.text
book_content_dict['chapters'].append(chapter_dict)
book_dict['content'] = book_content_dict
return book_dict