parent
b0a1359401
commit
d8d3b6af11
@ -1,50 +0,0 @@
|
|||||||
from collections import OrderedDict
|
|
||||||
import os
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
|
|
||||||
def get_book_content():
|
|
||||||
csv_path = os.path.dirname(os.path.realpath(__file__)) + '/test_example.csv'
|
|
||||||
print('Test CSV File :: ', csv_path)
|
|
||||||
df = pd.read_csv(csv_path, header=None).rename(
|
|
||||||
columns={0: 'chapter', 1: 'sentence', 2: 'text'})
|
|
||||||
|
|
||||||
book_dict = OrderedDict()
|
|
||||||
|
|
||||||
for index, row in df.iterrows():
|
|
||||||
ch_id = row['chapter']
|
|
||||||
s_id = row['sentence']
|
|
||||||
text = row['text']
|
|
||||||
# print(ch_id, " -> ", s_id, " -> ", text)
|
|
||||||
|
|
||||||
if ch_id not in book_dict:
|
|
||||||
book_dict[ch_id] = []
|
|
||||||
book_dict[ch_id].append(text)
|
|
||||||
|
|
||||||
return book_dict
|
|
||||||
|
|
||||||
|
|
||||||
def get_book_metadata():
|
|
||||||
|
|
||||||
dict_metadata = {
|
|
||||||
"book_id": "fdcap_book",
|
|
||||||
"title": "Crime and Punishment",
|
|
||||||
"lang": "en",
|
|
||||||
"isTranslation": "true",
|
|
||||||
"totalChapters": "2",
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "Herr Isaac Riley",
|
|
||||||
"translator": "true"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Fyodor Dostoevsky"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"description": "Crime and Punishment (Russian: Преступление и наказание) is a novel written by Russian author "
|
|
||||||
"Fyodor Dostoevsky.First published in a journal named The Russian Messenger, it appeared in "
|
|
||||||
"twelve monthly installments in 1866, and was later published as a novel",
|
|
||||||
"source": "https://en.wikisource.org/wiki/Crime_and_Punishment"
|
|
||||||
}
|
|
||||||
|
|
||||||
return dict_metadata
|
|
||||||
@ -0,0 +1,77 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Parallel Corpus Creation</title>
|
||||||
|
<style>
|
||||||
|
a.button {
|
||||||
|
-webkit-appearance: button;
|
||||||
|
-moz-appearance: button;
|
||||||
|
appearance: button;
|
||||||
|
text-decoration: none;
|
||||||
|
color: initial;
|
||||||
|
background-color: #ECB142;
|
||||||
|
border-radius: 2px;
|
||||||
|
border: 1px solid #ECD9CF;
|
||||||
|
color: white;
|
||||||
|
padding: 4px 4px;
|
||||||
|
text-align: center;
|
||||||
|
font-size: 16px;
|
||||||
|
font-weight: bold;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
a.button:hover {
|
||||||
|
background-color: #E87131;
|
||||||
|
}
|
||||||
|
table tr td {
|
||||||
|
font-size: 17px;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<h2 align="center">Main Heading comes here.....</h2>
|
||||||
|
<br />
|
||||||
|
<br />
|
||||||
|
|
||||||
|
<div id="main" align="center">
|
||||||
|
|
||||||
|
<table width="auto" cellspacing="20" cellpadding="4">
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td width="auto" align="left">Crime And Punishment(EN) - Verbrechen und Strafe(DE)</td>
|
||||||
|
<td align="center"><a class="button" href="xslt/action.html">View HTML</a></td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td width="auto" align="left">Crime And Punishment(EN) - Преступление и наказание(RU)</td>
|
||||||
|
<td align="center"><a class="button" href="xslt/action.html">View HTML</a></td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td width="auto" align="left">The Gambler(EN) - Der Spieler(DE)</td>
|
||||||
|
<td align="center"><a class="button" href="xslt/action.html">View HTML</a></td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td width="auto" align="left">The Gambler(EN) - Игрок(RU)</td>
|
||||||
|
<td align="center"><a class="button" href="xslt/action.html">View HTML</a></td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td width="auto" align="left">Notes from Underground(EN) - Aufzeichnungen aus dem Kellerloch(DE)</td>
|
||||||
|
<td align="center"><a class="button" href="xslt/dost_under_ende.html">View HTML</a></td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td width="auto" align="left">Notes from Underground(EN) - Записки из подполья(RU)</td>
|
||||||
|
<td align="center"><a class="button" href="xslt/action.html">View HTML</a></td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
</table>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
@ -0,0 +1,20 @@
|
|||||||
|
{
|
||||||
|
"books": {
|
||||||
|
"dost_under_ende": [
|
||||||
|
{
|
||||||
|
"xml_file": "dost_under_ende_en.xml",
|
||||||
|
"lang": "en",
|
||||||
|
"xml_file_path": "/Users/pavanmandava/PythonWorkspace/bitext-aligner/xml_files/dost_under_ende_en.xml",
|
||||||
|
"is_validated": true,
|
||||||
|
"is_saved_to_db": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"xml_file": "dost_under_ende_de.xml",
|
||||||
|
"lang": "de",
|
||||||
|
"xml_file_path": "/Users/pavanmandava/PythonWorkspace/bitext-aligner/xml_files/dost_under_ende_de.xml",
|
||||||
|
"is_validated": true,
|
||||||
|
"is_saved_to_db": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue