You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
51 lines
1.5 KiB
51 lines
1.5 KiB
from collections import OrderedDict
|
|
import os
|
|
import pandas as pd
|
|
|
|
|
|
def get_book_content():
|
|
csv_path = os.path.dirname(os.path.realpath(__file__)) + '/test_example.csv'
|
|
print('Test CSV File :: ', csv_path)
|
|
df = pd.read_csv(csv_path, header=None).rename(
|
|
columns={0: 'chapter', 1: 'sentence', 2: 'text'})
|
|
|
|
book_dict = OrderedDict()
|
|
|
|
for index, row in df.iterrows():
|
|
ch_id = row['chapter']
|
|
s_id = row['sentence']
|
|
text = row['text']
|
|
# print(ch_id, " -> ", s_id, " -> ", text)
|
|
|
|
if ch_id not in book_dict:
|
|
book_dict[ch_id] = []
|
|
book_dict[ch_id].append(text)
|
|
|
|
return book_dict
|
|
|
|
|
|
def get_book_metadata():
|
|
|
|
dict_metadata = {
|
|
"book_id": "fdcap_book",
|
|
"title": "Crime and Punishment",
|
|
"lang": "en",
|
|
"isTranslation": "true",
|
|
"totalChapters": "2",
|
|
"authors": [
|
|
{
|
|
"name": "Herr Isaac Riley",
|
|
"translator": "true"
|
|
},
|
|
{
|
|
"name": "Fyodor Dostoevsky"
|
|
}
|
|
],
|
|
"description": "Crime and Punishment (Russian: Преступление и наказание) is a novel written by Russian author "
|
|
"Fyodor Dostoevsky.First published in a journal named The Russian Messenger, it appeared in "
|
|
"twelve monthly installments in 1866, and was later published as a novel",
|
|
"source": "https://en.wikisource.org/wiki/Crime_and_Punishment"
|
|
}
|
|
|
|
return dict_metadata
|