DB Schema Updated,

XML Read File Added,
DB Connection code added
master
Pavan Mandava 6 years ago
parent f0a718d6b4
commit dbd17ad8a7

@ -0,0 +1,23 @@
import mysql.connector
from mysql.connector import errorcode
import db.read_config as config
def get_new_mysql_connection():
db_config = config.read_db_config('../db_config.ini', 'mysql')
connection = None
try:
connection = mysql.connector.connect(**db_config)
except mysql.connector.Error as err:
if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
print('Invalid Database User and Password')
elif err.errno == errorcode.ER_BAD_DB_ERROR:
print('Database doesn\'t exist ')
else:
print(err)
return connection

@ -0,0 +1,28 @@
from configparser import ConfigParser
import os
def read_db_config(filename, section):
""" Read database configuration file and return a dictionary object
:param filename: name of the configuration file
:param section: section of database configuration
:return: a dictionary of database parameters
"""
parser = ConfigParser()
parser.read(filename)
db = {}
if parser.has_section(section):
items = parser.items(section)
for item in items:
db[item[0]] = item[1]
else:
raise Exception('{0} not found in the {1} file'.format(section, filename))
try:
db['password'] = os.environ[db['password']]
except KeyError:
print('Please set the Environment Variable ', db['password'])
return db

@ -0,0 +1,9 @@
import db.mysql_connection as connection
conn = connection.get_new_mysql_connection()
print(conn.charset)
print('isConnected :: ', conn.is_connected())
conn.close()

@ -0,0 +1,6 @@
[mysql]
host = 127.0.0.1
port = 3306
database = bitext-aligner
user = root
password = MYSQL_PASSWORD

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.4 MiB

After

Width:  |  Height:  |  Size: 1.4 MiB

@ -1,5 +1,5 @@
-- MySQL Script generated by MySQL Workbench -- MySQL Script generated by MySQL Workbench
-- Thu Jan 16 23:41:59 2020 -- Sat Jan 18 00:33:33 2020
-- Model: New Model Version: 1.0 -- Model: New Model Version: 1.0
-- MySQL Workbench Forward Engineering -- MySQL Workbench Forward Engineering
@ -35,6 +35,7 @@ ENGINE = InnoDB;
CREATE TABLE IF NOT EXISTS `bitext-aligner`.`dim_book` ( CREATE TABLE IF NOT EXISTS `bitext-aligner`.`dim_book` (
`id` INT NOT NULL, `id` INT NOT NULL,
`code` VARCHAR(90) NOT NULL, `code` VARCHAR(90) NOT NULL,
`added_at` BIGINT UNSIGNED NOT NULL,
PRIMARY KEY (`id`)) PRIMARY KEY (`id`))
ENGINE = InnoDB; ENGINE = InnoDB;

@ -3,7 +3,7 @@ import json
json_file_path = Path('json/books.json') json_file_path = Path('json/books.json')
json_data = {'books': []} json_data = {'books': {}}
if not json_file_path.is_file(): if not json_file_path.is_file():
json_file = open(json_file_path, 'w') json_file = open(json_file_path, 'w')
json_file.write(json.dumps(json_data, indent=4)) json_file.write(json.dumps(json_data, indent=4))

@ -48,38 +48,45 @@ def create_xml_file(book_dict, book_metadata):
chapter.set('num', str(key)) chapter.set('num', str(key))
for idx, val in enumerate(book_dict[key]): for idx, val in enumerate(book_dict[key]):
sentence = ET.SubElement(chapter, 'sentence') sentence = ET.SubElement(chapter, 'sentence')
sentence.set('id', str(idx + 1)) sentence.set('num', str(idx + 1))
sentence.text = val sentence.text = val
# tree = ET.ElementTree(book_root) # tree = ET.ElementTree(book_root)
# tree.write(filename) # tree.write(filename)
root_dir = os.path.dirname(os.path.dirname(__file__)) root_dir = os.path.dirname(os.path.dirname(__file__))
output_dir = os.path.join(root_dir, "xml_files") output_dir = os.path.join(root_dir, "xml_files")
filename = book_root.get('id') + "_" + lang.text + ".xml" filename = book_root.get('code') + "_" + lang.text + ".xml"
file = open(output_dir + '/' + filename, 'w') file = open(output_dir + '/' + filename, 'w')
file_path = file.name file_path = file.name
print('XML File Path :: ', file_path) print('XML File Path :: ', file_path)
file.write(prettify(book_root)) file.write(prettify(book_root))
file.close() file.close()
json_obj = {} json_obj = {}
json_obj['book_id'] = book_root.get('id') bbok_code = book_root.get('code')
json_obj['xml_file'] = filename json_obj['xml_file'] = filename
json_obj['lang'] = lang.text json_obj['lang'] = lang.text
json_obj['xml_file_path'] = file_path json_obj['xml_file_path'] = file_path
json_obj['is_validated'] = False json_obj['is_validated'] = False
json_obj['is_saved_to_db'] = False json_obj['is_saved_to_db'] = False
add_xml_book_data_to_json(json_obj) add_xml_book_data_to_json(bbok_code, json_obj)
def add_xml_book_data_to_json(json_obj): def add_xml_book_data_to_json(book_code, json_obj):
json_file_path = Path('json/books.json') json_file_path = Path('json/books.json')
json_file = open(json_file_path, 'r') json_file = open(json_file_path, 'r')
json_data = json.load(json_file) json_data = json.load(json_file)
json_file.close() json_file.close()
books = json_data['books']
if book_code in books.keys():
books[book_code].append(json_obj)
else:
books[book_code] = [json_obj]
json_data['books'] = books
json_file = open(json_file_path, 'w') json_file = open(json_file_path, 'w')
json_data['books'].append(json_obj)
json_file.write(json.dumps(json_data, indent=4)) json_file.write(json.dumps(json_data, indent=4))
json_file.close() json_file.close()

@ -0,0 +1,42 @@
import xml.etree.ElementTree as ET
def parse_xml_file(full_path):
book_dict = {}
tree = ET.parse(full_path)
book_root = tree.getroot()
# print('Root Element :: ', book_root.tag, ' | Attributes :: ', book_root.attrib)
book_dict['code'] = book_root.attrib['code']
book_info_dict = {}
book_content_dict = {}
book_info_element = book_root.find('bookInfo')
book_content_element = book_root.find('content')
book_info_dict['authors'] = []
for child in book_info_element:
if 'author' == child.tag:
author = {'name': child.text}
if 'translator' in child.attrib:
author['translator'] = child.attrib['translator']
book_info_dict['authors'].append(author)
else:
book_info_dict[child.tag] = child.text
book_dict['bookInfo'] = book_info_dict
book_content_dict['chapters'] = []
for chapter in book_content_element:
chapter_dict = {'num': chapter.attrib['num']}
if 'name' in chapter.attrib:
chapter_dict['name'] = chapter.attrib['name']
chapter_dict['sentences'] = {}
for sentence in chapter.findall('sentence'):
chapter_dict['sentences'][sentence.attrib['num']] = sentence.text
book_content_dict['chapters'].append(chapter_dict)
book_dict['content'] = book_content_dict
return book_dict

@ -1,6 +1,11 @@
from csv2df import get_book_content, get_book_metadata from csv2df import get_book_content, get_book_metadata
import xml_parser.create_xml as create_xml
import xml_parser.read_xml as read_xml
from xml_parser.create_xml import create_xml_file create_xml.create_xml_file(get_book_content(), get_book_metadata())
create_xml_file(get_book_content(), get_book_metadata())
book_dict = read_xml.parse_xml_file('/Users/pavanmandava/PythonWorkspace/bitext-aligner/xml_files/abcdef_en.xml')
print(book_dict)

Loading…
Cancel
Save