From 940ddca4dcdd703cf971687a7d7afc81b0e8f448 Mon Sep 17 00:00:00 2001 From: Pavan Mandava Date: Mon, 20 Jan 2020 01:58:31 +0100 Subject: [PATCH] Adding Books to Database Done + Some Fixes & Improvements --- db/add_book.py | 271 +++++++++++++++++++++++++++++++++++++++ db/constants.py | 24 ++++ db/mysql_connection.py | 10 +- db/test.py | 2 +- db_schema/db_schema.sql | 20 ++- run.py | 31 +++++ xml_parser/create_xml.py | 4 +- 7 files changed, 354 insertions(+), 8 deletions(-) create mode 100644 db/add_book.py create mode 100644 db/constants.py create mode 100644 run.py diff --git a/db/add_book.py b/db/add_book.py new file mode 100644 index 0000000..b0a7ad4 --- /dev/null +++ b/db/add_book.py @@ -0,0 +1,271 @@ +import time +import db.mysql_connection as mysql +import db.constants as const + + +def add_book_to_db(book_code, book_dict): + # print('Adding Book Code :: ', book_code, ' Dict :: ', book_dict) + + conn = mysql.get_new_mysql_connection('db_config.ini') + if conn is None: + return False + + db_cursor = conn.cursor(buffered=True) + + # add book data to the Table First + book_row = { + 'code': book_code, + 'added_at': int(time.time()) + } + + # returns the last row id, if row added to the table successfully + last_rowid = add_book_row_to_table(db_cursor, const.BOOK_INSERT_QUERY, book_row) + book_id = last_rowid + print('Book Row Id :: ', last_rowid) + + book_info_dict = book_dict['bookInfo'] + if last_rowid > 0: + book_info_row = { + 'title': book_info_dict['title'], + 'description': book_info_dict['description'] if 'description' in book_info_dict else None, + 'lang': book_info_dict['lang'], + 'source': book_info_dict['source'], + 'is_translation': 'true' == book_info_dict['isTranslation'].lower(), + 'total_chapters': book_info_dict['totalChapters'], + 'isbn': book_info_dict['isbn'] if 'isbn' in book_info_dict else None, + 'book': book_id + } + + # returns the last row id, if row added to the table successfully + last_rowid = add_book_info_row_to_table(db_cursor, const.BOOK_INFO_INSERT_QUERY, book_info_row) + print('Book Info Row Id :: ', last_rowid) + + if last_rowid > 0: + book_info_id = last_rowid + authors_list = book_info_dict['authors'] + for author in authors_list: + author_row = { + 'id': -1, + 'name': author['name'].strip().lower(), + 'total_books': 1 + } + author_row = search_author(db_cursor, const.AUTHOR_SEARCH_QUERY, author_row) + print('Author Search Result :: ', author_row) + if author_row['id'] > 0: + author_row['total_books'] = author_row['total_books'] + 1 + last_rowid = update_author_book_count(db_cursor, const.AUTHOR_UPDATE_QUERY, author_row) + print('Author Update Row count :: ', last_rowid) + if last_rowid <= 0: + break + else: + author_row['name'] = author['name'] + author_row['total_books'] = 1 + last_rowid = add_author_to_table(db_cursor, const.AUTHOR_INSERT_QUERY, author_row) + print('Add Author Row Id :: ', last_rowid) + if last_rowid > 0: + author_row['id'] = last_rowid + + if author_row['id'] > 0: + author_is_translator = False + if 'translator' in author: + author_is_translator = 'true' == author['translator'].lower() + map_author_book = { + 'author': author_row['id'], + 'book': book_info_id, + 'translator': author_is_translator + } + + last_rowid = add_author_book_mapping(db_cursor, const.BOOK_AUTHOR_INSERT_QUERY, map_author_book) + print('Author Book Mapping Row ID :: ', last_rowid) + if last_rowid < 0: + break + + if last_rowid > 0: + book_content_row = { + 'book': book_id + } + + # returns the last row id, if row added to the table successfully + last_rowid = add_book_content_row_to_table(db_cursor, const.CONTENT_INSERT_QUERY, book_content_row) + print('Book Content Row Id :: ', last_rowid) + + if last_rowid > 0: + content_id = last_rowid + book_chapters_list = book_dict['content']['chapters'] + for chapter in book_chapters_list: + book_chapter_row = { + 'c_num': chapter['num'], + 'name': chapter['name'] if 'name' in chapter else None, + 'book_content': content_id + } + chapter_id = add_book_chapter_to_table(db_cursor, const.CHAPTER_INSERT_QUERY, book_chapter_row) + print('Book Chapter Row Id :: ', chapter_id) + if chapter_id > 0: + sentences_dict = chapter['sentences'] + for s_num in sentences_dict.keys(): + sentence_row = { + 's_num': s_num, + 'text': sentences_dict[s_num], + 'chapter': chapter_id + } + sen_id = add_book_sentence_to_table(db_cursor, const.SENTENCE_INSERT_QUERY, sentence_row) + print('Book Sentence Id :: ', sen_id) + if sen_id <= 0: + break + else: + last_rowid = sen_id + else: + break + + db_cursor.close() + + is_success = False + if last_rowid > 0: + conn.commit() + is_success = True + else: + conn.rollback() + is_success = False + + conn.close() + + return is_success + + +def add_book_row_to_table(db_cursor, book_insert_query, book_row): + try: + # Insert this Book row to Table + db_cursor.execute(book_insert_query, book_row) + book_id = db_cursor.lastrowid + if book_id is not None: + return book_id + else: + return -1 + + except Exception as e: + print(str(e)) + return -1 + + +def add_book_info_row_to_table(db_cursor, book_info_insert_query, book_info_row): + try: + # Insert this BookInfo row + db_cursor.execute(book_info_insert_query, book_info_row) + book_info_id = db_cursor.lastrowid + if book_info_id is not None: + return book_info_id + else: + return -1 + + except Exception as e: + print(str(e)) + return -1 + + +def add_book_content_row_to_table(db_cursor, book_content_insert_query, book_content_row): + try: + # Insert Book Content row + db_cursor.execute(book_content_insert_query, book_content_row) + book_content_id = db_cursor.lastrowid + if book_content_id is not None: + return book_content_id + else: + return -1 + + except Exception as e: + print(str(e)) + return -1 + + +def add_book_chapter_to_table(db_cursor, book_chapter_insert_query, book_chapter_row): + try: + # Insert Book chapter row + db_cursor.execute(book_chapter_insert_query, book_chapter_row) + book_chapter_id = db_cursor.lastrowid + if book_chapter_id is not None: + return book_chapter_id + else: + return -1 + + except Exception as e: + print(str(e)) + return -1 + + +def add_book_sentence_to_table(db_cursor, book_sentence_insert_query, book_sentence): + try: + # Insert sentence + db_cursor.execute(book_sentence_insert_query, book_sentence) + book_sen_id = db_cursor.lastrowid + if book_sen_id is not None: + return book_sen_id + else: + return -1 + + except Exception as e: + print(str(e)) + return -1 + + +def add_author_to_table(db_cursor, author_insert_query, author_data): + try: + # Insert Author + db_cursor.execute(author_insert_query, author_data) + author_id = db_cursor.lastrowid + if author_id is not None: + return author_id + else: + return -1 + + except Exception as e: + print(str(e)) + return -1 + + +def add_author_book_mapping(db_cursor, book_author_insert_query, book_author_data): + try: + # Insert Book Author Mapping + db_cursor.execute(book_author_insert_query, book_author_data) + map_id = db_cursor.rowcount + if map_id > 0: + return map_id + else: + return -1 + + except Exception as e: + print(str(e)) + return -1 + + +def search_author(db_cursor, author_search_query, author_data): + try: + # Search Author + db_cursor.execute(author_search_query, author_data) + row = db_cursor.fetchone() + if row is not None: + author_data['id'] = int(row[0]) + author_data['total_books'] = int(row[2]) + return author_data + else: + author_data['id'] = -1 + return author_data + + except Exception as e: + print(str(e)) + author_data['id'] = -1 + return author_data + + +def update_author_book_count(db_cursor, author_update_query, author_data): + try: + # Update Author Book Count + db_cursor.execute(author_update_query, author_data) + row_cnt = db_cursor.rowcount + if row_cnt > 0: + return row_cnt + else: + return -1 + + except Exception as e: + print(str(e)) + return -1 diff --git a/db/constants.py b/db/constants.py new file mode 100644 index 0000000..d88a98d --- /dev/null +++ b/db/constants.py @@ -0,0 +1,24 @@ +BOOK_INSERT_QUERY = "INSERT INTO dim_book (code, added_at) " \ + "VALUES (%(code)s, %(added_at)s)" + +AUTHOR_INSERT_QUERY = "INSERT INTO dim_author (name, total_books) " \ + "VALUES (%(name)s, %(total_books)s)" + +BOOK_INFO_INSERT_QUERY = "INSERT INTO dim_book_info (title, description, lang, source, is_translation, " \ + "total_chapters, isbn, book) " \ + "VALUES (%(title)s, %(description)s, %(lang)s, %(source)s, %(is_translation)s, " \ + "%(total_chapters)s, %(isbn)s, %(book)s) " + +BOOK_AUTHOR_INSERT_QUERY = "INSERT INTO map_book_author (author, book, translator) " \ + "VALUES (%(author)s, %(book)s, %(translator)s)" + +CONTENT_INSERT_QUERY = "INSERT INTO dim_book_content (book) VALUES(%(book)s)" + +CHAPTER_INSERT_QUERY = "INSERT INTO dim_book_chapter (c_num, name, book_content) " \ + "VALUES (%(c_num)s, %(name)s, %(book_content)s)" + +SENTENCE_INSERT_QUERY = "INSERT INTO dim_book_sentence (s_num, text, chapter) VALUES (%(s_num)s, %(text)s, %(chapter)s)" + +AUTHOR_SEARCH_QUERY = "SELECT * FROM dim_author WHERE dim_author.name = %(name)s" + +AUTHOR_UPDATE_QUERY = "UPDATE dim_author SET dim_author.total_books = %(total_books)s WHERE id = %(id)s" \ No newline at end of file diff --git a/db/mysql_connection.py b/db/mysql_connection.py index 731b7d5..cef0db8 100644 --- a/db/mysql_connection.py +++ b/db/mysql_connection.py @@ -4,9 +4,9 @@ from mysql.connector import errorcode import db.read_config as config -def get_new_mysql_connection(): +def get_new_mysql_connection(config_file_path): - db_config = config.read_db_config('../db_config.ini', 'mysql') + db_config = config.read_db_config(config_file_path, 'mysql') connection = None @@ -20,4 +20,10 @@ def get_new_mysql_connection(): else: print(err) + if connection is not None: + if connection.is_connected(): + connection.autocommit = False + else: + connection = None + return connection diff --git a/db/test.py b/db/test.py index 61cbb42..2c24380 100644 --- a/db/test.py +++ b/db/test.py @@ -1,6 +1,6 @@ import db.mysql_connection as connection -conn = connection.get_new_mysql_connection() +conn = connection.get_new_mysql_connection('../db_config.ini') print(conn.charset) print('isConnected :: ', conn.is_connected()) diff --git a/db_schema/db_schema.sql b/db_schema/db_schema.sql index 1172375..d81fc44 100644 --- a/db_schema/db_schema.sql +++ b/db_schema/db_schema.sql @@ -1,5 +1,5 @@ -- MySQL Script generated by MySQL Workbench --- Sat Jan 18 00:33:33 2020 +-- Sun Jan 19 21:04:29 2020 -- Model: New Model Version: 1.0 -- MySQL Workbench Forward Engineering @@ -21,6 +21,8 @@ USE `bitext-aligner` ; -- ----------------------------------------------------- -- Table `bitext-aligner`.`dim_author` -- ----------------------------------------------------- +DROP TABLE IF EXISTS `bitext-aligner`.`dim_author` ; + CREATE TABLE IF NOT EXISTS `bitext-aligner`.`dim_author` ( `id` INT NOT NULL AUTO_INCREMENT, `name` VARCHAR(90) NOT NULL, @@ -32,8 +34,10 @@ ENGINE = InnoDB; -- ----------------------------------------------------- -- Table `bitext-aligner`.`dim_book` -- ----------------------------------------------------- +DROP TABLE IF EXISTS `bitext-aligner`.`dim_book` ; + CREATE TABLE IF NOT EXISTS `bitext-aligner`.`dim_book` ( - `id` INT NOT NULL, + `id` INT NOT NULL AUTO_INCREMENT, `code` VARCHAR(90) NOT NULL, `added_at` BIGINT UNSIGNED NOT NULL, PRIMARY KEY (`id`)) @@ -43,6 +47,8 @@ ENGINE = InnoDB; -- ----------------------------------------------------- -- Table `bitext-aligner`.`dim_book_info` -- ----------------------------------------------------- +DROP TABLE IF EXISTS `bitext-aligner`.`dim_book_info` ; + CREATE TABLE IF NOT EXISTS `bitext-aligner`.`dim_book_info` ( `id` INT NOT NULL AUTO_INCREMENT, `title` VARCHAR(90) NOT NULL, @@ -71,8 +77,10 @@ CREATE UNIQUE INDEX `id_UNIQUE` ON `bitext-aligner`.`dim_book_info` (`id` ASC) V -- ----------------------------------------------------- -- Table `bitext-aligner`.`dim_book_content` -- ----------------------------------------------------- +DROP TABLE IF EXISTS `bitext-aligner`.`dim_book_content` ; + CREATE TABLE IF NOT EXISTS `bitext-aligner`.`dim_book_content` ( - `id` INT NOT NULL, + `id` INT NOT NULL AUTO_INCREMENT, `book` INT NOT NULL, PRIMARY KEY (`id`), CONSTRAINT `content_book_fk` @@ -90,6 +98,8 @@ CREATE UNIQUE INDEX `book_UNIQUE` ON `bitext-aligner`.`dim_book_content` (`book` -- ----------------------------------------------------- -- Table `bitext-aligner`.`dim_book_chapter` -- ----------------------------------------------------- +DROP TABLE IF EXISTS `bitext-aligner`.`dim_book_chapter` ; + CREATE TABLE IF NOT EXISTS `bitext-aligner`.`dim_book_chapter` ( `id` INT NOT NULL AUTO_INCREMENT, `c_num` INT UNSIGNED NOT NULL, @@ -109,6 +119,8 @@ CREATE INDEX `content_fk_idx` ON `bitext-aligner`.`dim_book_chapter` (`book_cont -- ----------------------------------------------------- -- Table `bitext-aligner`.`dim_book_sentence` -- ----------------------------------------------------- +DROP TABLE IF EXISTS `bitext-aligner`.`dim_book_sentence` ; + CREATE TABLE IF NOT EXISTS `bitext-aligner`.`dim_book_sentence` ( `id` INT NOT NULL AUTO_INCREMENT, `s_num` INT UNSIGNED NOT NULL, @@ -128,6 +140,8 @@ CREATE INDEX `chapter_fk_idx` ON `bitext-aligner`.`dim_book_sentence` (`chapter` -- ----------------------------------------------------- -- Table `bitext-aligner`.`map_book_author` -- ----------------------------------------------------- +DROP TABLE IF EXISTS `bitext-aligner`.`map_book_author` ; + CREATE TABLE IF NOT EXISTS `bitext-aligner`.`map_book_author` ( `author` INT NOT NULL, `book` INT NOT NULL, diff --git a/run.py b/run.py new file mode 100644 index 0000000..b914531 --- /dev/null +++ b/run.py @@ -0,0 +1,31 @@ +import json +from pathlib import Path +import xml_parser.read_xml as read_xml +import db.add_book as adb + +json_file_path = Path('json/books.json') + +with open(json_file_path, 'r') as json_file: + json_data = json.load(json_file) + json_file.close() + + books_json = json_data['books'] + for book_code in books_json.keys(): + books_list = books_json[book_code] + for book in books_list: + # TODO :: Add not for the below check later (after doing XSD) + if book['is_validated']: + print('Book : ', book['xml_file'], ' is not validated against XSD') + continue + if not book['is_saved_to_db']: + print('Saving Book : ', book['xml_file'], ' in the DB') + book_dict = read_xml.parse_xml_file(book['xml_file_path']) + result = adb.add_book_to_db(book_code, book_dict) + book['is_saved_to_db'] = result + + json_data['books'] = books_json + + with open(json_file_path, 'w') as updated_json: + updated_json.write(json.dumps(json_data, indent=4)) + updated_json.close() + diff --git a/xml_parser/create_xml.py b/xml_parser/create_xml.py index 83b29a7..c7ae5fc 100644 --- a/xml_parser/create_xml.py +++ b/xml_parser/create_xml.py @@ -62,13 +62,13 @@ def create_xml_file(book_dict, book_metadata): file.write(prettify(book_root)) file.close() json_obj = {} - bbok_code = book_root.get('code') + book_code = book_root.get('code') json_obj['xml_file'] = filename json_obj['lang'] = lang.text json_obj['xml_file_path'] = file_path json_obj['is_validated'] = False json_obj['is_saved_to_db'] = False - add_xml_book_data_to_json(bbok_code, json_obj) + add_xml_book_data_to_json(book_code, json_obj) def add_xml_book_data_to_json(book_code, json_obj):