|
|
|
@ -12,19 +12,8 @@ data = read_csv_file(csv_file_path=train_file_path, delimiter='\t')
|
|
|
|
|
|
|
|
|
|
|
|
i = 0
|
|
|
|
i = 0
|
|
|
|
feature_dict = {}
|
|
|
|
feature_dict = {}
|
|
|
|
for inst in data:
|
|
|
|
for inst in data[:20]:
|
|
|
|
if len(inst.features) >= 0:
|
|
|
|
inst.print()
|
|
|
|
# inst.print()
|
|
|
|
|
|
|
|
i += 1
|
|
|
|
|
|
|
|
tokens = inst.text.split()
|
|
|
|
|
|
|
|
for token in tokens:
|
|
|
|
|
|
|
|
if token not in feature_dict:
|
|
|
|
|
|
|
|
feature_dict[token] = 1
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
feature_dict[token] += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for key in sorted(feature_dict, key=feature_dict.get, reverse=True):
|
|
|
|
|
|
|
|
print(key, ' -> ', feature_dict.get(key))
|
|
|
|
|
|
|
|
# print('Data Points without Features :: ', i)
|
|
|
|
# print('Data Points without Features :: ', i)
|
|
|
|
|
|
|
|
|
|
|
|
# tokens = inst.text.split()
|
|
|
|
# tokens = inst.text.split()
|
|
|
|
|