JSON Read/Write Utility Added, XML Validation against XSD and other minor improvements

master
Pavan Mandava 6 years ago
parent 5ae77714d5
commit e3129186af

@ -1,4 +1,5 @@
google-cloud-translate==2.0.0 google-cloud-translate==2.0.0
google-cloud-storage==1.19.1 google-cloud-storage==1.19.1
mysql-connector-python==8.0.19 mysql-connector-python==8.0.19
pandas pandas
xmlschema

@ -1,20 +1,18 @@
import json import json
from pathlib import Path
import xml_parser.read_xml as read_xml import xml_parser.read_xml as read_xml
import db.add_book as adb import db.add_book as adb
import xml_parser.validate as validate
import utils.json_utils as json_utils
import utils.constants as const
json_file_path = Path('json/books.json')
with open(json_file_path, 'r') as json_file:
json_data = json.load(json_file)
json_file.close()
def save_validated_files_to_db():
json_data = json_utils.read_json_file(const.JSON_PATH)
books_json = json_data['books'] books_json = json_data['books']
for book_code in books_json.keys(): for book_code in books_json.keys():
books_list = books_json[book_code] books_list = books_json[book_code]
for book in books_list: for book in books_list:
# TODO :: Add not for the below check later (after doing XSD) if not book['is_validated']:
if book['is_validated']:
print('Book : ', book['xml_file'], ' is not validated against XSD') print('Book : ', book['xml_file'], ' is not validated against XSD')
continue continue
if not book['is_saved_to_db']: if not book['is_saved_to_db']:
@ -24,8 +22,11 @@ with open(json_file_path, 'r') as json_file:
book['is_saved_to_db'] = result book['is_saved_to_db'] = result
json_data['books'] = books_json json_data['books'] = books_json
json_utils.write_json_file(const.JSON_PATH, json_data)
def validate_all_xml_files():
validate.validate_all_xml_files()
with open(json_file_path, 'w') as updated_json:
updated_json.write(json.dumps(json_data, indent=4))
updated_json.close()
validate_all_xml_files()

@ -0,0 +1,3 @@
JSON_PATH = 'json/books.json'
XSD_PATH = 'xml_files/book.xsd'

@ -0,0 +1,19 @@
import json
from pathlib import Path
def read_json_file(file_path):
json_file_path = Path(file_path)
with open(json_file_path, 'r') as json_file:
json_data = json.load(json_file)
json_file.close()
return json_data
def write_json_file(file_path, json_data):
json_file_path = Path(file_path)
with open(json_file_path, 'w') as updated_json:
updated_json.write(json.dumps(json_data, indent=4))
updated_json.close()

@ -2,7 +2,8 @@ from xml.etree import ElementTree as ET
from xml.dom import minidom from xml.dom import minidom
import os import os
import json import json
from pathlib import Path import utils.json_utils as json_utils
import utils.constants as const
def create_xml_file(book_dict, book_metadata): def create_xml_file(book_dict, book_metadata):
@ -69,13 +70,12 @@ def create_xml_file(book_dict, book_metadata):
json_obj['is_saved_to_db'] = False json_obj['is_saved_to_db'] = False
add_xml_book_data_to_json(book_code, json_obj) add_xml_book_data_to_json(book_code, json_obj)
return file_path
def add_xml_book_data_to_json(book_code, json_obj): def add_xml_book_data_to_json(book_code, json_obj):
json_file_path = Path('json/books.json')
json_file = open(json_file_path, 'r') json_data = json_utils.read_json_file(const.JSON_PATH)
json_data = json.load(json_file)
json_file.close()
books = json_data['books'] books = json_data['books']
if book_code in books.keys(): if book_code in books.keys():
@ -85,9 +85,7 @@ def add_xml_book_data_to_json(book_code, json_obj):
json_data['books'] = books json_data['books'] = books
json_file = open(json_file_path, 'w') json_utils.write_json_file(const.JSON_PATH, json_data)
json_file.write(json.dumps(json_data, indent=4))
json_file.close()
def prettify(root): def prettify(root):

@ -1,11 +1,17 @@
from csv2df import get_book_content, get_book_metadata from csv2df import get_book_content, get_book_metadata
import xml_parser.create_xml as create_xml import xml_parser.create_xml as create_xml
import xml_parser.read_xml as read_xml import xml_parser.read_xml as read_xml
import xmlschema
from pathlib import Path
import xml_parser.validate as validate
create_xml.create_xml_file(get_book_content(), get_book_metadata()) file_path = create_xml.create_xml_file(get_book_content(), get_book_metadata())
print(file_path)
validate.validate_all_xml_files()
# book_dict = read_xml.parse_xml_file('/Users/pavanmandava/PythonWorkspace/bitext-aligner/xml_files/abcdef_en.xml')
book_dict = read_xml.parse_xml_file('/Users/pavanmandava/PythonWorkspace/bitext-aligner/xml_files/abcdef_en.xml')
print(book_dict)

@ -0,0 +1,38 @@
import xmlschema
import json
from pathlib import Path
import utils.json_utils as json_utils
import utils.constants as const
def is_valid(book_schema, xml_path):
return book_schema.is_valid(xml_path)
def get_book_schema(book_xsd_path):
xsd_path = Path(book_xsd_path)
book_schema = xmlschema.XMLSchema(str(xsd_path.absolute()))
return book_schema
def validate_all_xml_files():
json_data = json_utils.read_json_file(const.JSON_PATH)
book_schema = get_book_schema(const.XSD_PATH)
books_json = json_data['books']
for book_code in books_json.keys():
books_list = books_json[book_code]
for book in books_list:
if book['is_validated']:
print('Book : ', book['xml_file'], ' is valid')
continue
else:
if 'xml_file_path' in book:
result = book_schema.is_valid(book['xml_file_path'])
print('Validating Book : ', book['xml_file'], ' -> ', result)
book['is_validated'] = result
json_data['books'] = books_json
json_utils.write_json_file(const.JSON_PATH, json_data)
Loading…
Cancel
Save