|
|
|
@ -4,7 +4,7 @@ Dictionary of Lexicons used for Feature Extraction
|
|
|
|
ALL_LEXICONS = {
|
|
|
|
ALL_LEXICONS = {
|
|
|
|
|
|
|
|
|
|
|
|
'COMPARE': ['compar', 'compet', 'evaluat', 'test', 'superior', 'inferior', 'better', 'best', 'good', 'low',
|
|
|
|
'COMPARE': ['compar', 'compet', 'evaluat', 'test', 'superior', 'inferior', 'better', 'best', 'good', 'low',
|
|
|
|
'worse', 'worst', 'greater', 'larger', 'faster', 'high', 'measur', 'between', 'another', 'similar'],
|
|
|
|
'wors', 'great', 'larger', 'faster', 'high', 'measur', 'between', 'another', 'similar'],
|
|
|
|
|
|
|
|
|
|
|
|
'CONTRAST': ['contrast', 'different' 'distinct', 'conflict', 'disagree', 'oppose', 'distinguish', 'contrary'],
|
|
|
|
'CONTRAST': ['contrast', 'different' 'distinct', 'conflict', 'disagree', 'oppose', 'distinguish', 'contrary'],
|
|
|
|
|
|
|
|
|
|
|
|
@ -19,7 +19,8 @@ ALL_LEXICONS = {
|
|
|
|
'USE': ['use', 'using', 'apply', 'applied', 'employ', 'make use', 'utilize', 'implement'],
|
|
|
|
'USE': ['use', 'using', 'apply', 'applied', 'employ', 'make use', 'utilize', 'implement'],
|
|
|
|
|
|
|
|
|
|
|
|
'PRESENT': ['describe', 'discuss', 'give', 'introduce', 'note', 'notice', 'present', 'propose', 'recapitulate',
|
|
|
|
'PRESENT': ['describe', 'discuss', 'give', 'introduce', 'note', 'notice', 'present', 'propose', 'recapitulate',
|
|
|
|
'demonstrate', 'remark', 'report', 'say', 'show', 'sketch', 'state', 'suggest', 'figure'],
|
|
|
|
'demonstrate', 'remark', 'report', 'say', 'show', 'sketch', 'state', 'suggest', 'figure', 'indicate',
|
|
|
|
|
|
|
|
'specify', 'explain'],
|
|
|
|
|
|
|
|
|
|
|
|
'IMPORTANT': ['important', 'main', 'key', 'basic', 'central', 'crucial', 'critical', 'essential', 'fundamental',
|
|
|
|
'IMPORTANT': ['important', 'main', 'key', 'basic', 'central', 'crucial', 'critical', 'essential', 'fundamental',
|
|
|
|
'great', 'largest', 'major', 'overall', 'primary', 'principle', 'serious', 'substantial', 'ultimate',
|
|
|
|
'great', 'largest', 'major', 'overall', 'primary', 'principle', 'serious', 'substantial', 'ultimate',
|
|
|
|
@ -28,7 +29,8 @@ ALL_LEXICONS = {
|
|
|
|
'RESEARCH': ['research', 'paper', 'study', 'studie', 'apply', 'analyze', 'characteri', 'formali', 'investigat',
|
|
|
|
'RESEARCH': ['research', 'paper', 'study', 'studie', 'apply', 'analyze', 'characteri', 'formali', 'investigat',
|
|
|
|
'implement', 'interpret', 'examin', 'observ', 'predict', 'verify', 'work on', 'empirical', 'determin',
|
|
|
|
'implement', 'interpret', 'examin', 'observ', 'predict', 'verify', 'work on', 'empirical', 'determin',
|
|
|
|
'experiment', 'exploratory', 'ongoing', 'quantitative', 'qualitative', 'preliminary', 'statistical',
|
|
|
|
'experiment', 'exploratory', 'ongoing', 'quantitative', 'qualitative', 'preliminary', 'statistical',
|
|
|
|
'knowledge', 'underway', 'discuss', 'reference', 'publish', 'document', 'orientation'],
|
|
|
|
'knowledge', 'underway', 'discuss', 'reference', 'publish', 'document', 'orientation',
|
|
|
|
|
|
|
|
'literature', 'experience'],
|
|
|
|
|
|
|
|
|
|
|
|
'APPROACH': ['approach', 'account', 'algorithm', 'analys', 'approach', 'application', 'architecture', 'characteri',
|
|
|
|
'APPROACH': ['approach', 'account', 'algorithm', 'analys', 'approach', 'application', 'architecture', 'characteri',
|
|
|
|
'component', 'design', 'extension', 'formali', 'framework', 'implement', 'investigat', 'machine',
|
|
|
|
'component', 'design', 'extension', 'formali', 'framework', 'implement', 'investigat', 'machine',
|
|
|
|
@ -47,22 +49,25 @@ ALL_LEXICONS = {
|
|
|
|
'expert', 'investigators', 'linguists', 'philosophers', 'psycholinguists',
|
|
|
|
'expert', 'investigators', 'linguists', 'philosophers', 'psycholinguists',
|
|
|
|
'psychologists', 'researchers', 'scholars', 'semanticists', 'scientists'],
|
|
|
|
'psychologists', 'researchers', 'scholars', 'semanticists', 'scientists'],
|
|
|
|
|
|
|
|
|
|
|
|
'MEDICINE': ['medicine', 'tissue', 'gene', 'inflammatory', 'mutant', 'neuro', 'digest', 'ortho', 'kinase', 'pneumonia',
|
|
|
|
'MEDICINE': ['medicine', 'tissue', 'gene', 'inflammatory', 'mutant', 'neuro', 'digest', 'ortho', 'kinase',
|
|
|
|
'clinical', 'therap', 'kidney', 'receptor', 'cancer', 'synthesis', 'protein', 'syndrom', 'toxin', 'death', 'calcium',
|
|
|
|
'clinical', 'therap', 'kidney', 'receptor', 'cancer', 'synthesis', 'protein', 'syndrom', 'toxin', 'death',
|
|
|
|
'pharma', 'heart', 'disease', 'vitamin', 'tumor', 'blind', 'symptom', 'medical', 'vaccin', 'molecule', 'rna',
|
|
|
|
'pharma', 'heart', 'disease', 'vitamin', 'tumor', 'blind', 'symptom', 'medical', 'vaccin', 'molecule',
|
|
|
|
'biotic', 'patient', 'cells', 'immune', 'blood', 'plasma', 'diagnos', 'neura', 'reproductive', 'plasm', 'drug',
|
|
|
|
'biotic', 'patient', 'cells', 'immune', 'blood', 'plasma', 'diagnos', 'neura', 'reproductive', 'plasm', 'drug',
|
|
|
|
'membrane', 'muscle', 'contagious', 'inflam', 'physician', 'dna', 'genome', 'bacteria', 'cavity', 'injury',
|
|
|
|
'membrane', 'muscle', 'contagious', 'inflam', 'physician', 'dna', 'genome', 'bacteria', 'cavity', 'injury',
|
|
|
|
'antibodies', 'liver', 'treatment', 'pcr', 'acid', 'chronic', 'respirat', 'oxygen', 'stroke', 'antioxidant',
|
|
|
|
'antibodies', 'liver', 'treatment', 'pcr', 'acid', 'chronic', 'respirat', 'oxygen', 'stroke', 'antioxidant', 'obesity',
|
|
|
|
'metabolic', 'transmission', 'endogenous', 'syndrome', 'ultrasound', 'pathogen'],
|
|
|
|
'metabolic', 'transmission', 'endogenous', 'syndrome', 'ultrasound', 'pathogen', 'inject', 'laparoscop',
|
|
|
|
|
|
|
|
'circulat', 'ventricle', 'tract', 'pneumonia', 'calcium', 'rna', 'organism', 'biolog', 'x-ray'],
|
|
|
|
|
|
|
|
|
|
|
|
'MATH': ['matrix', 'gaussian', 'variance', 'radius', 'function', 'comput', 'once', 'twice', 'thrice', 'diagram', 'mean',
|
|
|
|
'MATH': ['matrix', 'gaussian', 'variance', 'radius', 'function', 'comput', 'once', 'twice', 'thrice', 'diagram', 'mean',
|
|
|
|
'vector', 'rectangle', 'logic', 'amount', 'maxim', 'minim', 'linear', 'magnitude', 'theorem', 'gradient', 'median',
|
|
|
|
'vector', 'rectangle', 'logic', 'amount', 'maxim', 'minim', 'linear', 'magnitude', 'theorem', 'gradient', 'median',
|
|
|
|
'exponential', 'complex', 'graph', 'mean', 'equation', 'offset', 'calculat', 'coefficient', 'discrete', 'equation',
|
|
|
|
'exponential', 'complex', 'graph', 'mean', 'equation', 'offset', 'calculat', 'coefficient', 'discrete', 'equation',
|
|
|
|
'math', 'correlation', 'outcome', 'divergence', 'differentiation', 'statistic', 'parameter', 'probabilit', 'multivariate'],
|
|
|
|
'frequen', 'math', 'correlation', 'outcome', 'divergence', 'differentiation', 'statistic', 'parameter',
|
|
|
|
|
|
|
|
'probabilit', 'multivariate', 'negative', 'positive', 'regression', 'digit'],
|
|
|
|
|
|
|
|
|
|
|
|
'COMPUTER_SCIENCE': ['database', 'software', 'evaluation', 'framework', 'computer', 'network', 'algorithm',
|
|
|
|
'COMPUTER_SCIENCE': ['database', 'software', 'evaluation', 'framework', 'computer', 'network',
|
|
|
|
'dataset','data sets', 'technology', 'kernel', 'metrics', 'nlp', 'xml', 'corpus', 'uml', 'system',
|
|
|
|
'algorithm', 'dataset','data sets', 'technology', 'kernel', 'metrics', 'nlp', 'xml',
|
|
|
|
'security', 'protocol'],
|
|
|
|
'corpus', 'uml', 'system', 'security', 'protocol', 'classification', 'data transform',
|
|
|
|
|
|
|
|
'memory', 'java', 'python', 'cluster', 'epoch', 'training', 'deadlock', 'technique'],
|
|
|
|
|
|
|
|
|
|
|
|
'CITATION': ['et al'], # TODO (for Isaac) :: Write a complex regex for finding Citations in the text
|
|
|
|
'CITATION': ['et al'], # TODO (for Isaac) :: Write a complex regex for finding Citations in the text
|
|
|
|
|
|
|
|
|
|
|
|
|