Create XML from Python done

master
Pavan Mandava 6 years ago
parent 46f2e93e9a
commit 2e2353b545

@ -0,0 +1,63 @@
from xml.etree import ElementTree as ET
from xml.dom import minidom
def create_xml_file(book_dict, book_metadata):
book_root = ET.Element('book')
book_root.set('id', book_metadata['book_id'])
book_info = ET.SubElement(book_root, 'bookInfo')
content = ET.SubElement(book_root, 'content')
title = ET.SubElement(book_info, 'title')
title.text = book_metadata['title']
lang = ET.SubElement(book_info, 'lang')
lang.text = book_metadata['lang']
is_translation = ET.SubElement(book_info, 'isTranslation')
is_translation.text = book_metadata['isTranslation']
total_chapters = ET.SubElement(book_info, 'totalChapters')
total_chapters.text = book_metadata['totalChapters']
if 'description' in book_metadata:
description = ET.SubElement(book_info, 'description')
description.text = book_metadata['description']
if 'source' in book_metadata:
source = ET.SubElement(book_info, 'source')
source.text = book_metadata['source']
if 'isbn' in book_metadata:
isbn = ET.SubElement(book_info, 'isbn')
isbn.text = book_metadata['isbn']
authors_list = book_metadata['authors']
for auth in authors_list:
author = ET.SubElement(book_info, 'author')
author.text = auth['name']
if 'translator' in auth:
author.set('translator', auth['translator'])
for key in book_dict.keys():
chapter = ET.SubElement(content, 'chapter')
chapter.set('id', str(key))
for idx, val in enumerate(book_dict[key]):
sentence = ET.SubElement(chapter, 'sentence')
sentence.set('id', str(idx + 1))
sentence.text = val
# tree = ET.ElementTree(book_root)
# tree.write(filename)
filename = book_root.get('id') + "_" + lang.text + ".xml"
file = open(filename, 'w')
file.write(prettify(book_root))
def prettify(element):
""" Return a pretty-printed XML string for the Element.
"""
rough_string = ET.tostring(element, 'utf-8')
parsed = minidom.parseString(rough_string)
return parsed.toprettyxml(indent=" ")

@ -1,3 +1,46 @@
from collections import OrderedDict
import pandas as pd import pandas as pd
def get_book_content():
df = pd.read_csv("test_example.csv", header=None).rename( df = pd.read_csv("test_example.csv", header=None).rename(
columns={0: 'chapter', 1: 'sentence', 2: 'text'}) columns={0: 'chapter', 1: 'sentence', 2: 'text'})
book_dict = OrderedDict()
for index, row in df.iterrows():
ch_id = row['chapter']
s_id = row['sentence']
text = row['text']
print(ch_id, " -> ", s_id, " -> ", text)
if ch_id not in book_dict:
book_dict[ch_id] = []
book_dict[ch_id].append(text)
return book_dict
def get_book_metadata():
dict_metadata = {
"book_id": "abcdef",
"title": "Bullshit",
"lang": "en",
"isTranslation": "true",
"totalChapters": "2",
"authors": [
{
"name": "Herr Riley",
"translator": "true"
},
{
"name": "Herr Singh"
}
],
"description": "Some Random Bullshit description",
"source": "https://www.idontcare.com"
}
return dict_metadata

@ -1,3 +1,4 @@
google-cloud-translate==2.0.0 google-cloud-translate==2.0.0
google-cloud-storage==1.19.1 google-cloud-storage==1.19.1
mysql-connector-python==8.0.19 mysql-connector-python==8.0.19
pandas

@ -0,0 +1,6 @@
from csv2df import get_book_content, get_book_metadata
from create_xml import create_xml_file
create_xml_file(get_book_content(), get_book_metadata())
Loading…
Cancel
Save