diff --git a/testing/feature_testing.py b/testing/feature_testing.py index 3781b05..7da6ef3 100644 --- a/testing/feature_testing.py +++ b/testing/feature_testing.py @@ -12,19 +12,8 @@ data = read_csv_file(csv_file_path=train_file_path, delimiter='\t') i = 0 feature_dict = {} -for inst in data: - if len(inst.features) >= 0: - # inst.print() - i += 1 - tokens = inst.text.split() - for token in tokens: - if token not in feature_dict: - feature_dict[token] = 1 - continue - feature_dict[token] += 1 - -for key in sorted(feature_dict, key=feature_dict.get, reverse=True): - print(key, ' -> ', feature_dict.get(key)) +for inst in data[:20]: + inst.print() # print('Data Points without Features :: ', i) # tokens = inst.text.split()