You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
240 lines
5.0 KiB
240 lines
5.0 KiB
\documentclass[
|
|
xcolor={svgnames},
|
|
hyperref={colorlinks,citecolor=DeepPink4,linkcolor=DarkRed,urlcolor=DarkBlue}
|
|
]{beamer}
|
|
|
|
% define using customized theme.
|
|
\usetheme{pas}
|
|
|
|
% define using packages
|
|
\usepackage[utf8]{inputenc}
|
|
\usepackage[T1]{fontenc}
|
|
\usepackage {minted}
|
|
|
|
% the general information.
|
|
\title[] % (optional, only for long titles)
|
|
{Citation Intent Classification}
|
|
\subtitle{Identifying the Intent of a Citation in scientific papers}
|
|
|
|
\author[tmip, hieutt] % (optional, for multiple authors)
|
|
{Isaac Riley and Pavan Mandava}
|
|
\institute[Universities Here and There] % (optional)
|
|
{
|
|
\inst{1}%
|
|
Computational Linguistics, M.Sc.\\
|
|
\and
|
|
\inst{2}%
|
|
Computational Linguistics, M.Sc.\\
|
|
}
|
|
\date[] % (optional)
|
|
{May 20, 2020}
|
|
\subject{Computational Linguistics}
|
|
|
|
|
|
|
|
% begin presentation content
|
|
\begin{document}
|
|
|
|
%%%% Slide : 1 -- INTRO
|
|
\begin{frame}
|
|
\titlepage
|
|
\end{frame}
|
|
|
|
|
|
%%%% TASK DESCRIPTION ----- Slide 2
|
|
\begin{frame}
|
|
\frametitle{Task Description}
|
|
|
|
\begin{itemize}
|
|
|
|
\item Identifying intent of a citation in scientific papers
|
|
\bigskip
|
|
\item Three Intent categories/classes from the data set
|
|
\begin{enumerate}
|
|
\item background (background information)
|
|
\item method (use of methods/tools)
|
|
\item result (comparing results)
|
|
\end{enumerate}
|
|
\bigskip
|
|
\item {\bf Classification Task }
|
|
\begin{itemize}
|
|
\item Assign a discrete class (intent) for each data point
|
|
\end{itemize}
|
|
\end{itemize}
|
|
|
|
\end{frame}
|
|
|
|
%%%% DATA SET ---- Slide 3
|
|
\begin{frame}
|
|
\frametitle{Data set}
|
|
|
|
\begin{itemize}
|
|
|
|
\item Training Data: 8.2K+ data points
|
|
\begin{enumerate}
|
|
\item background - 4.8K
|
|
\item method - 2.3K
|
|
\item result - 1.1K
|
|
\end{enumerate}
|
|
\bigskip
|
|
\item Testing Data: 1.8K data points
|
|
\begin{enumerate}
|
|
\item background - 1K
|
|
\item method - 0.6K
|
|
\item result - 0.2K
|
|
\end{enumerate}
|
|
\end{itemize}
|
|
|
|
\end{frame}
|
|
|
|
|
|
%%%% Approach/Architectures ---- Slide 4
|
|
\begin{frame}[fragile]
|
|
\frametitle{Approach \& Architecture}
|
|
\framesubtitle{Classifier Implementation}
|
|
|
|
Base Classifier: {\bf {\color{red} Perceptron}}
|
|
\begin{itemize}
|
|
\item Linear Classifier
|
|
\item Binary Classifier
|
|
\end{itemize}
|
|
|
|
\begin{minted}[autogobble, breaklines,breakanywhere, fontfamily=helvetica, fontsize=\small]{python}
|
|
class Perceptron:
|
|
def __init__(self, label: str, weights: dict, theta_bias: float)
|
|
def score(self, features: list)
|
|
def update_weights(self, features: list, learning_rate: float, penalize: bool, reward: bool)
|
|
|
|
class MultiClassPerceptron:
|
|
def __init__(self, epochs: int,learning_rate: float,random_state: int)
|
|
def fit(self, X_train: list, labels: list)
|
|
def predict(self, X_test: list)
|
|
|
|
\end{minted}
|
|
\bigskip
|
|
- {\bf Parameters} and {\bf Hyperparameters}
|
|
|
|
\end{frame}
|
|
|
|
|
|
|
|
%%%% Approach/Architectures ---- Slide 5
|
|
\begin{frame}[fragile]
|
|
\frametitle{Approach \& Architecture}
|
|
\framesubtitle{Feature Representation}
|
|
|
|
Lexicons and Regular Expressions ($\approx$ 30 Features)
|
|
\bigskip
|
|
|
|
\begin{itemize}
|
|
\item LEXICONS
|
|
\begin{minted}[autogobble, breaklines,breakanywhere, fontfamily=helvetica, fontsize=\small]{python}
|
|
ALL_LEXICONS = {
|
|
'INCREASE': ['increase', 'grow', 'intensify', 'build up', 'explode'],
|
|
'USE': ['use', 'using', 'apply', 'applied', 'employ', 'make use'],
|
|
.....
|
|
}
|
|
\end{minted}
|
|
\bigskip
|
|
\item REGEX
|
|
\begin{itemize}
|
|
\item $ACRONYM$
|
|
\item $CONTAINS\_URL$
|
|
\item $ENDS\_WITH\_ETHYL$
|
|
\end{itemize}
|
|
\end{itemize}
|
|
|
|
\end{frame}
|
|
|
|
%%%% Evaluation ---- Slide 6
|
|
\begin{frame}[fragile]
|
|
\frametitle{Evaluation of the Classifier}
|
|
\framesubtitle{F1 Score}
|
|
|
|
\bigskip
|
|
|
|
\begin{itemize}
|
|
\item F1 Score
|
|
\begin{itemize}
|
|
\item weighted average of Precision and Recall
|
|
\end{itemize}
|
|
\bigskip
|
|
\begin{minted}[autogobble, breaklines,breakanywhere, fontfamily=helvetica]{python}
|
|
def f1_score(y_true, y_pred, labels, average)
|
|
\end{minted}
|
|
\bigskip
|
|
\item Averaging
|
|
\begin{itemize}
|
|
\item MACRO
|
|
\item MICRO
|
|
\item None
|
|
\end{itemize}
|
|
\bigskip
|
|
\item Why {\color{red} MACRO} and {\color{red}MICRO} ?
|
|
|
|
\end{itemize}
|
|
|
|
\end{frame}
|
|
|
|
%%%% Results ---- Slide 7
|
|
\begin{frame}
|
|
\frametitle{Model Performance}
|
|
\framesubtitle{Results}
|
|
|
|
\begin{table}
|
|
\def\arraystretch{1.5}
|
|
{\setlength{\tabcolsep}{2em}
|
|
\begin{tabular}{| l | c | c |} \hline
|
|
{\bf Averaging} & {\bf Score} \\
|
|
\hline \hline
|
|
MICRO & 0.64 \\
|
|
\hline
|
|
MACRO & 0.57 \\
|
|
\hline
|
|
background & 0.72 \\
|
|
method & 0.54 \\
|
|
result & 0.46 \\
|
|
\hline
|
|
\end{tabular}}
|
|
|
|
\caption{F1-Score Results}
|
|
\end{table}
|
|
|
|
\end{frame}
|
|
|
|
|
|
%%%% Next Steps ---- Slide 8
|
|
\begin{frame}
|
|
\frametitle{Next Steps}
|
|
\begin{itemize}
|
|
\item Better Feature Representation - Word Embeddings
|
|
\begin{itemize}
|
|
\item word2vec
|
|
\item BERT
|
|
\item ELMo
|
|
\item \dots{}
|
|
\end{itemize}
|
|
|
|
\item Better Classifier (Non-Linear / Neural Networks)
|
|
\begin{itemize}
|
|
\item BiRNNs
|
|
\item BiLSTMs
|
|
\item CNNs
|
|
\item \dots{}
|
|
\end{itemize}
|
|
|
|
\item Interaction with other groups
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
%%%% THANK YOU -- Slide 9
|
|
\begin{frame}
|
|
\usebeamerfont{frametitle}\usebeamercolor[fg]{frametitle}
|
|
\centering \Large
|
|
Thanks for listening
|
|
\end{frame}
|
|
|
|
\end{document}
|
|
|
|
|