Path Issues Fixed,

Schema VARCHAR size increased,
And more Logs Added
master
Pavan Mandava 6 years ago
parent 4217754b84
commit 59d6bcdfc3

@ -1,10 +1,12 @@
from collections import OrderedDict from collections import OrderedDict
import os
import pandas as pd import pandas as pd
def get_book_content(): def get_book_content():
df = pd.read_csv("test_example.csv", header=None).rename( csv_path = os.path.dirname(os.path.realpath(__file__)) + '/test_example.csv'
print('Test CSV File :: ', csv_path)
df = pd.read_csv(csv_path, header=None).rename(
columns={0: 'chapter', 1: 'sentence', 2: 'text'}) columns={0: 'chapter', 1: 'sentence', 2: 'text'})
book_dict = OrderedDict() book_dict = OrderedDict()
@ -25,22 +27,24 @@ def get_book_content():
def get_book_metadata(): def get_book_metadata():
dict_metadata = { dict_metadata = {
"book_id": "abcdef", "book_id": "fdcap_book",
"title": "Bullshit", "title": "Crime and Punishment",
"lang": "en", "lang": "en",
"isTranslation": "true", "isTranslation": "true",
"totalChapters": "2", "totalChapters": "2",
"authors": [ "authors": [
{ {
"name": "Herr Riley", "name": "Herr Isaac Riley",
"translator": "true" "translator": "true"
}, },
{ {
"name": "Herr Singh" "name": "Fyodor Dostoevsky"
} }
], ],
"description": "Some Random Bullshit description", "description": "Crime and Punishment (Russian: Преступление и наказание) is a novel written by Russian author "
"source": "https://www.idontcare.com" "Fyodor Dostoevsky.First published in a journal named The Russian Messenger, it appeared in "
"twelve monthly installments in 1866, and was later published as a novel",
"source": "https://en.wikisource.org/wiki/Crime_and_Punishment"
} }
return dict_metadata return dict_metadata

@ -52,7 +52,7 @@ DROP TABLE IF EXISTS `bitext-aligner`.`dim_book_info` ;
CREATE TABLE IF NOT EXISTS `bitext-aligner`.`dim_book_info` ( CREATE TABLE IF NOT EXISTS `bitext-aligner`.`dim_book_info` (
`id` INT NOT NULL AUTO_INCREMENT, `id` INT NOT NULL AUTO_INCREMENT,
`title` VARCHAR(90) NOT NULL, `title` VARCHAR(90) NOT NULL,
`description` VARCHAR(200) NULL, `description` VARCHAR(450) NULL,
`lang` VARCHAR(5) NOT NULL, `lang` VARCHAR(5) NOT NULL,
`source` VARCHAR(90) NOT NULL, `source` VARCHAR(90) NOT NULL,
`is_translation` TINYINT NOT NULL, `is_translation` TINYINT NOT NULL,
@ -124,7 +124,7 @@ DROP TABLE IF EXISTS `bitext-aligner`.`dim_book_sentence` ;
CREATE TABLE IF NOT EXISTS `bitext-aligner`.`dim_book_sentence` ( CREATE TABLE IF NOT EXISTS `bitext-aligner`.`dim_book_sentence` (
`id` INT NOT NULL AUTO_INCREMENT, `id` INT NOT NULL AUTO_INCREMENT,
`s_num` INT UNSIGNED NOT NULL, `s_num` INT UNSIGNED NOT NULL,
`text` VARCHAR(500) NOT NULL, `text` VARCHAR(900) NOT NULL,
`chapter` INT NOT NULL, `chapter` INT NOT NULL,
PRIMARY KEY (`id`), PRIMARY KEY (`id`),
CONSTRAINT `sen_chapter_fk` CONSTRAINT `sen_chapter_fk`

@ -24,7 +24,10 @@ def save_validated_files_to_db():
book_dict = read_xml.parse_xml_file(book['xml_file_path']) book_dict = read_xml.parse_xml_file(book['xml_file_path'])
result = adb.add_book_to_db(book_code, book_dict) result = adb.add_book_to_db(book_code, book_dict)
book['is_saved_to_db'] = result book['is_saved_to_db'] = result
print(const.BLUE, 'Result :: ', result, const.END, '\n') w_str = const.WARNING
if result:
w_str = const.BLUE
print(w_str, 'Result :: ', result, const.END, '\n')
json_data['books'] = books_json json_data['books'] = books_json
json_utils.write_json_file(const.JSON_PATH, json_data) json_utils.write_json_file(const.JSON_PATH, json_data)

@ -2,4 +2,5 @@
1,2,"Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt." 1,2,"Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt."
1,3,"Neque porro quisquam est, qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et dolore magnam aliquam quaerat voluptatem." 1,3,"Neque porro quisquam est, qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et dolore magnam aliquam quaerat voluptatem."
2,1,"Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur?" 2,1,"Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur?"
2,2,"Quis autem vel eum iure reprehenderit qui in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla pariatur?" 2,2,"Quis autem vel eum iure reprehenderit qui in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla pariatur?"
2,3,"Neque porro quisquam est, qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et dolore magnam aliquam quaerat voluptatem."
1 1 1 Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo.
2 1 2 Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt.
3 1 3 Neque porro quisquam est, qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et dolore magnam aliquam quaerat voluptatem.
4 2 1 Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur?
5 2 2 Quis autem vel eum iure reprehenderit qui in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla pariatur?
6 2 3 Neque porro quisquam est, qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et dolore magnam aliquam quaerat voluptatem.

@ -1,9 +1,9 @@
import json import json
from pathlib import Path import os
def read_json_file(file_path): def read_json_file(file_path):
json_file_path = Path(file_path) json_file_path = os.path.dirname(os.path.dirname(__file__))+'/'+file_path
with open(json_file_path, 'r') as json_file: with open(json_file_path, 'r') as json_file:
json_data = json.load(json_file) json_data = json.load(json_file)
@ -12,7 +12,7 @@ def read_json_file(file_path):
def write_json_file(file_path, json_data): def write_json_file(file_path, json_data):
json_file_path = Path(file_path) json_file_path = os.path.dirname(os.path.dirname(__file__))+'/'+file_path
with open(json_file_path, 'w') as updated_json: with open(json_file_path, 'w') as updated_json:
updated_json.write(json.dumps(json_data, indent=4)) updated_json.write(json.dumps(json_data, indent=4))

@ -2,17 +2,17 @@
<book code="abc_book"> <book code="abc_book">
<bookInfo> <bookInfo>
<title>Crime and Punishment</title> <title>Crime and Punishment</title>
<lang>en</lang>
<isTranslation>true</isTranslation>
<totalChapters>2</totalChapters>
<source>https://en.wikisource.org/wiki/Crime_and_Punishment</source>
<description> <!--Optional--> <description> <!--Optional-->
Crime and Punishment (Russian: Преступление и наказание) is a novel written by Russian author Fyodor Dostoevsky. Crime and Punishment (Russian: Преступление и наказание) is a novel written by Russian author Fyodor Dostoevsky.
First published in a journal named The Russian Messenger, it appeared in twelve monthly installments in 1866, and was later published as a novel. First published in a journal named The Russian Messenger, it appeared in twelve monthly installments in 1866, and was later published as a novel.
</description> </description>
<lang>en</lang> <isbn>n.a.</isbn> <!--Optional-->
<author>Fyodor Dostoevsky</author> <author>Fyodor Dostoevsky</author>
<author translator="true">Constance Garnett</author> <author translator="true">Constance Garnett</author>
<source>https://en.wikisource.org/wiki/Crime_and_Punishment</source>
<isTranslation>true</isTranslation>
<totalChapters>2</totalChapters>
<isbn>n.a.</isbn> <!--Optional-->
</bookInfo> </bookInfo>
<content> <content>
<chapter num="1" name="Erstes Kapitel"> <chapter num="1" name="Erstes Kapitel">

@ -1,12 +1,16 @@
from pathlib import Path from pathlib import Path
import json import json
import utils.constants as const
import os
json_file_path = Path('json/books.json')
json_path = os.path.dirname(os.path.dirname(__file__))+'/'+const.JSON_PATH
json_file_path = Path(json_path)
json_data = {'books': {}} json_data = {'books': {}}
if not json_file_path.is_file(): if not json_file_path.is_file():
json_file = open(json_file_path, 'w') json_file = open(json_file_path, 'w')
json_file.write(json.dumps(json_data, indent=4)) json_file.write(json.dumps(json_data, indent=4))
json_file.close() json_file.close()
print('JSON File Created :: '+json_file.name) print(const.BLUE, 'JSON File Created :: '+json_file.name, const.END)

@ -58,9 +58,9 @@ def create_xml_file(book_dict, book_metadata):
filename = book_root.get('code') + "_" + lang.text + ".xml" filename = book_root.get('code') + "_" + lang.text + ".xml"
file = open(output_dir + '/' + filename, 'w') file = open(output_dir + '/' + filename, 'w')
file_path = file.name file_path = file.name
print('XML File Path :: ', file_path)
file.write(prettify(book_root)) file.write(prettify(book_root))
file.close() file.close()
print(const.BLUE, 'Saved XML File Path :: ', file_path, const.END)
json_obj = {} json_obj = {}
book_code = book_root.get('code') book_code = book_root.get('code')
json_obj['xml_file'] = filename json_obj['xml_file'] = filename

@ -1,11 +1,10 @@
from csv2df import get_book_content, get_book_metadata from csv2df import get_book_content, get_book_metadata
import xml_parser.create_xml as create_xml import xml_parser.create_xml as create_xml
import xml_parser.read_xml as read_xml import xml_parser.read_xml as read_xml
import xmlschema
from pathlib import Path
import xml_parser.validate as validate import xml_parser.validate as validate
# file_path = create_xml.create_xml_file(get_book_content(), get_book_metadata())
file_path = create_xml.create_xml_file(get_book_content(), get_book_metadata())
# print(file_path) # print(file_path)

@ -1,8 +1,8 @@
import xmlschema import xmlschema
import json import json
from pathlib import Path
import utils.json_utils as json_utils import utils.json_utils as json_utils
import utils.constants as const import utils.constants as const
import os
def is_valid(book_schema, xml_path): def is_valid(book_schema, xml_path):
@ -10,8 +10,8 @@ def is_valid(book_schema, xml_path):
def get_book_schema(book_xsd_path): def get_book_schema(book_xsd_path):
xsd_path = Path(book_xsd_path) xsd_full_path = os.path.dirname(os.path.dirname(__file__))+'/'+book_xsd_path
book_schema = xmlschema.XMLSchema(str(xsd_path.absolute())) book_schema = xmlschema.XMLSchema(xsd_full_path)
return book_schema return book_schema

Loading…
Cancel
Save