diff --git a/db_schema/db_schema.pdf b/db_schema/db_schema.pdf new file mode 100644 index 0000000..c158807 Binary files /dev/null and b/db_schema/db_schema.pdf differ diff --git a/db_schema/db_schema.png b/db_schema/db_schema.png new file mode 100644 index 0000000..b6befc6 Binary files /dev/null and b/db_schema/db_schema.png differ diff --git a/db_schema/db_schema.sql b/db_schema/db_schema.sql new file mode 100644 index 0000000..78ef29b --- /dev/null +++ b/db_schema/db_schema.sql @@ -0,0 +1,153 @@ +-- MySQL Script generated by MySQL Workbench +-- Thu Jan 16 23:41:59 2020 +-- Model: New Model Version: 1.0 +-- MySQL Workbench Forward Engineering + +SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0; +SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0; +SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION'; + +-- ----------------------------------------------------- +-- Schema bitext-aligner +-- ----------------------------------------------------- +DROP SCHEMA IF EXISTS `bitext-aligner` ; + +-- ----------------------------------------------------- +-- Schema bitext-aligner +-- ----------------------------------------------------- +CREATE SCHEMA IF NOT EXISTS `bitext-aligner` DEFAULT CHARACTER SET utf8 ; +USE `bitext-aligner` ; + +-- ----------------------------------------------------- +-- Table `bitext-aligner`.`dim_author` +-- ----------------------------------------------------- +CREATE TABLE IF NOT EXISTS `bitext-aligner`.`dim_author` ( + `id` INT NOT NULL AUTO_INCREMENT, + `name` VARCHAR(90) NOT NULL, + `total_books` INT UNSIGNED NOT NULL, + PRIMARY KEY (`id`)) +ENGINE = InnoDB; + + +-- ----------------------------------------------------- +-- Table `bitext-aligner`.`dim_book` +-- ----------------------------------------------------- +CREATE TABLE IF NOT EXISTS `bitext-aligner`.`dim_book` ( + `id` INT NOT NULL, + `code` VARCHAR(90) NOT NULL, + PRIMARY KEY (`id`)) +ENGINE = InnoDB; + + +-- ----------------------------------------------------- +-- Table `bitext-aligner`.`dim_book_info` +-- ----------------------------------------------------- +CREATE TABLE IF NOT EXISTS `bitext-aligner`.`dim_book_info` ( + `id` INT NOT NULL AUTO_INCREMENT, + `title` VARCHAR(90) NOT NULL, + `description` VARCHAR(200) NULL, + `lang` VARCHAR(5) NOT NULL, + `source` VARCHAR(90) NOT NULL, + `is_translation` TINYINT NOT NULL, + `total_chapters` INT UNSIGNED NOT NULL, + `isbn` VARCHAR(80) NULL, + `book` INT NOT NULL, + PRIMARY KEY (`id`), + CONSTRAINT `info_book_fk` + FOREIGN KEY (`book`) + REFERENCES `bitext-aligner`.`dim_book` (`id`) + ON DELETE NO ACTION + ON UPDATE NO ACTION) +ENGINE = InnoDB; + +CREATE INDEX `book_fk_idx` ON `bitext-aligner`.`dim_book_info` (`book` ASC) VISIBLE; + +CREATE UNIQUE INDEX `book_UNIQUE` ON `bitext-aligner`.`dim_book_info` (`book` ASC) VISIBLE; + +CREATE UNIQUE INDEX `id_UNIQUE` ON `bitext-aligner`.`dim_book_info` (`id` ASC) VISIBLE; + + +-- ----------------------------------------------------- +-- Table `bitext-aligner`.`dim_book_content` +-- ----------------------------------------------------- +CREATE TABLE IF NOT EXISTS `bitext-aligner`.`dim_book_content` ( + `id` INT NOT NULL, + `book` INT NOT NULL, + PRIMARY KEY (`id`), + CONSTRAINT `content_book_fk` + FOREIGN KEY (`book`) + REFERENCES `bitext-aligner`.`dim_book` (`id`) + ON DELETE CASCADE + ON UPDATE CASCADE) +ENGINE = InnoDB; + +CREATE INDEX `book_fk_idx` ON `bitext-aligner`.`dim_book_content` (`book` ASC) VISIBLE; + +CREATE UNIQUE INDEX `book_UNIQUE` ON `bitext-aligner`.`dim_book_content` (`book` ASC) VISIBLE; + + +-- ----------------------------------------------------- +-- Table `bitext-aligner`.`dim_book_chapter` +-- ----------------------------------------------------- +CREATE TABLE IF NOT EXISTS `bitext-aligner`.`dim_book_chapter` ( + `id` INT NOT NULL AUTO_INCREMENT, + `c_num` INT UNSIGNED NOT NULL, + `name` VARCHAR(90) NULL, + `book_content` INT NOT NULL, + PRIMARY KEY (`id`), + CONSTRAINT `ch_content_fk` + FOREIGN KEY (`book_content`) + REFERENCES `bitext-aligner`.`dim_book_content` (`id`) + ON DELETE CASCADE + ON UPDATE CASCADE) +ENGINE = InnoDB; + +CREATE INDEX `content_fk_idx` ON `bitext-aligner`.`dim_book_chapter` (`book_content` ASC) VISIBLE; + + +-- ----------------------------------------------------- +-- Table `bitext-aligner`.`dim_book_sentence` +-- ----------------------------------------------------- +CREATE TABLE IF NOT EXISTS `bitext-aligner`.`dim_book_sentence` ( + `id` INT NOT NULL AUTO_INCREMENT, + `s_num` INT UNSIGNED NOT NULL, + `text` VARCHAR(500) NOT NULL, + `chapter` INT NOT NULL, + PRIMARY KEY (`id`), + CONSTRAINT `sen_chapter_fk` + FOREIGN KEY (`chapter`) + REFERENCES `bitext-aligner`.`dim_book_chapter` (`id`) + ON DELETE CASCADE + ON UPDATE CASCADE) +ENGINE = InnoDB; + +CREATE INDEX `chapter_fk_idx` ON `bitext-aligner`.`dim_book_sentence` (`chapter` ASC) VISIBLE; + + +-- ----------------------------------------------------- +-- Table `bitext-aligner`.`map_book_author` +-- ----------------------------------------------------- +CREATE TABLE IF NOT EXISTS `bitext-aligner`.`map_book_author` ( + `author` INT NOT NULL, + `book` INT NOT NULL, + `translator` TINYINT NOT NULL, + CONSTRAINT `map_book_fk` + FOREIGN KEY (`book`) + REFERENCES `bitext-aligner`.`dim_book_info` (`id`) + ON DELETE CASCADE + ON UPDATE CASCADE, + CONSTRAINT `map_author_fk` + FOREIGN KEY (`author`) + REFERENCES `bitext-aligner`.`dim_author` (`id`) + ON DELETE CASCADE + ON UPDATE CASCADE) +ENGINE = InnoDB; + +CREATE INDEX `book_fk_idx` ON `bitext-aligner`.`map_book_author` (`book` ASC) VISIBLE; + +CREATE INDEX `author_fk_idx` ON `bitext-aligner`.`map_book_author` (`author` ASC) VISIBLE; + + +SET SQL_MODE=@OLD_SQL_MODE; +SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS; +SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS; diff --git a/xml_files/book_structure.xml b/xml_files/book_structure.xml index 5dbade6..18d93d4 100644 --- a/xml_files/book_structure.xml +++ b/xml_files/book_structure.xml @@ -1,8 +1,8 @@ - + Crime and Punishment - + Crime and Punishment (Russian: Преступление и наказание) is a novel written by Russian author Fyodor Dostoevsky. First published in a journal named The Russian Messenger, it appeared in twelve monthly installments in 1866, and was later published as a novel. @@ -12,15 +12,15 @@ https://en.wikisource.org/wiki/Crime_and_Punishment true 2 - n.a. + n.a. - + First Sentence Second Sentence Third Sentence - + First Sentence Second Sentence Third Sentence diff --git a/xml_parser/create_xml.py b/xml_parser/create_xml.py index 16ce2a7..4109a5d 100644 --- a/xml_parser/create_xml.py +++ b/xml_parser/create_xml.py @@ -7,7 +7,7 @@ from pathlib import Path def create_xml_file(book_dict, book_metadata): book_root = ET.Element('book') - book_root.set('id', book_metadata['book_id']) + book_root.set('code', book_metadata['book_id']) book_info = ET.SubElement(book_root, 'bookInfo') content = ET.SubElement(book_root, 'content') @@ -45,7 +45,7 @@ def create_xml_file(book_dict, book_metadata): for key in book_dict.keys(): chapter = ET.SubElement(content, 'chapter') - chapter.set('id', str(key)) + chapter.set('num', str(key)) for idx, val in enumerate(book_dict[key]): sentence = ET.SubElement(chapter, 'sentence') sentence.set('id', str(idx + 1))