parent
46f2e93e9a
commit
2e2353b545
@ -0,0 +1,63 @@
|
|||||||
|
from xml.etree import ElementTree as ET
|
||||||
|
from xml.dom import minidom
|
||||||
|
|
||||||
|
|
||||||
|
def create_xml_file(book_dict, book_metadata):
|
||||||
|
book_root = ET.Element('book')
|
||||||
|
book_root.set('id', book_metadata['book_id'])
|
||||||
|
|
||||||
|
book_info = ET.SubElement(book_root, 'bookInfo')
|
||||||
|
content = ET.SubElement(book_root, 'content')
|
||||||
|
|
||||||
|
title = ET.SubElement(book_info, 'title')
|
||||||
|
title.text = book_metadata['title']
|
||||||
|
|
||||||
|
lang = ET.SubElement(book_info, 'lang')
|
||||||
|
lang.text = book_metadata['lang']
|
||||||
|
|
||||||
|
is_translation = ET.SubElement(book_info, 'isTranslation')
|
||||||
|
is_translation.text = book_metadata['isTranslation']
|
||||||
|
|
||||||
|
total_chapters = ET.SubElement(book_info, 'totalChapters')
|
||||||
|
total_chapters.text = book_metadata['totalChapters']
|
||||||
|
|
||||||
|
if 'description' in book_metadata:
|
||||||
|
description = ET.SubElement(book_info, 'description')
|
||||||
|
description.text = book_metadata['description']
|
||||||
|
|
||||||
|
if 'source' in book_metadata:
|
||||||
|
source = ET.SubElement(book_info, 'source')
|
||||||
|
source.text = book_metadata['source']
|
||||||
|
|
||||||
|
if 'isbn' in book_metadata:
|
||||||
|
isbn = ET.SubElement(book_info, 'isbn')
|
||||||
|
isbn.text = book_metadata['isbn']
|
||||||
|
|
||||||
|
authors_list = book_metadata['authors']
|
||||||
|
for auth in authors_list:
|
||||||
|
author = ET.SubElement(book_info, 'author')
|
||||||
|
author.text = auth['name']
|
||||||
|
if 'translator' in auth:
|
||||||
|
author.set('translator', auth['translator'])
|
||||||
|
|
||||||
|
for key in book_dict.keys():
|
||||||
|
chapter = ET.SubElement(content, 'chapter')
|
||||||
|
chapter.set('id', str(key))
|
||||||
|
for idx, val in enumerate(book_dict[key]):
|
||||||
|
sentence = ET.SubElement(chapter, 'sentence')
|
||||||
|
sentence.set('id', str(idx + 1))
|
||||||
|
sentence.text = val
|
||||||
|
|
||||||
|
# tree = ET.ElementTree(book_root)
|
||||||
|
# tree.write(filename)
|
||||||
|
filename = book_root.get('id') + "_" + lang.text + ".xml"
|
||||||
|
file = open(filename, 'w')
|
||||||
|
file.write(prettify(book_root))
|
||||||
|
|
||||||
|
|
||||||
|
def prettify(element):
|
||||||
|
""" Return a pretty-printed XML string for the Element.
|
||||||
|
"""
|
||||||
|
rough_string = ET.tostring(element, 'utf-8')
|
||||||
|
parsed = minidom.parseString(rough_string)
|
||||||
|
return parsed.toprettyxml(indent=" ")
|
||||||
@ -1,3 +1,46 @@
|
|||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
df = pd.read_csv("test_example.csv", header=None).rename(
|
|
||||||
columns={0:'chapter', 1:'sentence', 2:'text'})
|
|
||||||
|
def get_book_content():
|
||||||
|
df = pd.read_csv("test_example.csv", header=None).rename(
|
||||||
|
columns={0: 'chapter', 1: 'sentence', 2: 'text'})
|
||||||
|
|
||||||
|
book_dict = OrderedDict()
|
||||||
|
|
||||||
|
for index, row in df.iterrows():
|
||||||
|
ch_id = row['chapter']
|
||||||
|
s_id = row['sentence']
|
||||||
|
text = row['text']
|
||||||
|
print(ch_id, " -> ", s_id, " -> ", text)
|
||||||
|
|
||||||
|
if ch_id not in book_dict:
|
||||||
|
book_dict[ch_id] = []
|
||||||
|
book_dict[ch_id].append(text)
|
||||||
|
|
||||||
|
return book_dict
|
||||||
|
|
||||||
|
|
||||||
|
def get_book_metadata():
|
||||||
|
|
||||||
|
dict_metadata = {
|
||||||
|
"book_id": "abcdef",
|
||||||
|
"title": "Bullshit",
|
||||||
|
"lang": "en",
|
||||||
|
"isTranslation": "true",
|
||||||
|
"totalChapters": "2",
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "Herr Riley",
|
||||||
|
"translator": "true"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Herr Singh"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "Some Random Bullshit description",
|
||||||
|
"source": "https://www.idontcare.com"
|
||||||
|
}
|
||||||
|
|
||||||
|
return dict_metadata
|
||||||
|
|||||||
@ -1,3 +1,4 @@
|
|||||||
google-cloud-translate==2.0.0
|
google-cloud-translate==2.0.0
|
||||||
google-cloud-storage==1.19.1
|
google-cloud-storage==1.19.1
|
||||||
mysql-connector-python==8.0.19
|
mysql-connector-python==8.0.19
|
||||||
|
pandas
|
||||||
Loading…
Reference in new issue