%%
%% This is file `sample-authordraft.tex',
%% generated with the docstrip utility.
%%
%% The original source files were:
%%
%% samples.dtx (with options: `authordraft')
%%
%% IMPORTANT NOTICE:
%%
%% For the copyright see the source file.
%%
%% Any modified versions of this file must be renamed
%% with new filenames distinct from sample-authordraft.tex.
%%
%% For distribution of the original source see the terms
%% for copying and modification in the file samples.dtx.
%%
%% This generated file may be distributed as long as the
%% original source files, as listed above, are part of the
%% same distribution. (The sources need not necessarily be
%% in the same archive or directory.)
%%
%% Commands for TeXCount
%TC:macro \cite [option:text,text]
%TC:macro \citep [option:text,text]
%TC:macro \citet [option:text,text]
%TC:envir table 0 1
%TC:envir table* 0 1
%TC:envir tabular [ignore] word
%TC:envir displaymath 0 word
%TC:envir math 0 word
%TC:envir comment 0 0
%%
%%
%% The first command in your LaTeX source must be the \documentclass command.
%\documentclass[sigconf,authordraft]{acmart}
% DSBDA Setting
\documentclass[sigconf, nonacm]{acmart}
% Use \documentclass[sigconf, nonacm, anonymous]{acmart} % to compile an anonymized version
\geometry{a4paper}
\settopmatter{printacmref=false,printfolios=true} % will remove the copyright box, and show the page numbers
\usepackage{dsbda-style}
%\debugmode
%\usepackage{todonotes}
%\newcommand{\todoyellow}[1]{\todo[color=yellow,inline]{#1}}
%% NOTE that a single column version may required for
%% submission and peer review. This can be done by changing
%% the \doucmentclass[...]{acmart} in this template to
%% \documentclass[manuscript,screen]{acmart}
%%
%% To ensure 100% compatibility, please check the white list of
%% approved LaTeX packages to be used with the Master Article Template at
%% https://www.acm.org/publications/taps/whitelist-of-latex-packages
%% before creating your document. The white list page provides
%% information on how to submit additional LaTeX packages for
%% review and adoption.
%% Fonts used in the template cannot be substituted; margin
%% adjustments are not allowed.
%%
%% \BibTeX command to typeset BibTeX logo in the docs
\AtBeginDocument{%
\providecommand\BibTeX{{%
\normalfont B\kern-0.5em{\scshape i\kern-0.25em b}\kern-0.8em\TeX}}}
%% Rights management information. This information is sent to you
%% when you complete the rights form. These commands have SAMPLE
%% values in them; it is your responsibility as an author to replace
%% the commands and values with those provided to you when you
%% complete the rights form.
\setcopyright{acmcopyright}
\copyrightyear{2018}
\acmYear{2018}
\acmDOI{XXXXXXX.XXXXXXX}
%% These commands are for a PROCEEDINGS abstract or paper.
\acmConference[Conference acronym 'XX]{Make sure to enter the correct
conference title from your rights confirmation emai}{June 03--05,
2018}{Woodstock, NY}
%
% Uncomment \acmBooktitle if th title of the proceedings is different
% from ``Proceedings of ...''!
%
%\acmBooktitle{Woodstock '18: ACM Symposium on Neural Gaze Detection,
% June 03--05, 2018, Woodstock, NY}
\acmPrice{15.00}
\acmISBN{978-1-4503-XXXX-X/18/06}
%%
%% Submission ID.
%% Use this when submitting an article to a sponsored event. You'll
%% receive a unique submission ID from the organizers
%% of the event, and this ID should be used as the parameter to this command.
%%\acmSubmissionID{123-A56-BU3}
%%
%% For managing citations, it is recommended to use bibliography
%% files in BibTeX format.
%%
%% You can then either use BibTeX with the ACM-Reference-Format style,
%% or BibLaTeX with the acmnumeric or acmauthoryear sytles, that include
%% support for advanced citation of software artefact from the
%% biblatex-software package, also separately available on CTAN.
%%
%% Look at the sample-*-biblatex.tex files for templates showcasing
%% the biblatex styles.
%%
%%
%% For managing citations, it is recommended to use bibliography
%% files in BibTeX format.
%%
%% You can then either use BibTeX with the ACM-Reference-Format style,
%% or BibLaTeX with the acmnumeric or acmauthoryear sytles, that include
%% support for advanced citation of software artefact from the
%% biblatex-software package, also separately available on CTAN.
%%
%% Look at the sample-*-biblatex.tex files for templates showcasing
%% the biblatex styles.
%%
%%
%% The majority of ACM publications use numbered citations and
%% references. The command \citestyle{authoryear} switches to the
%% "author year" style.
%%
%% If you are preparing content for an event
%% sponsored by ACM SIGGRAPH, you must use the "author year" style of
%% citations and references.
%% Uncommenting
%% the next command will enable that style.
%%\citestyle{acmauthoryear}
% Add this line for proofreading the paper (DSBDA feature)
%\debugmode
%%
%% end of the preamble, start of the body of the document source.
\begin{document}
%%
%% The "title" command has an optional parameter,
%% allowing the author to define a "short title" to be used in page headers.
\title[Short Version of the Title Goes Here (Optional)]{DSBDA Template: The Name of the Title is Hope\\
\url{https://tinyurl.com/dsbda-template}}
%%
%% The "author" command and its associated commands are used to define
%% the authors and their affiliations.
%% Of note is the shared affiliation of the first two authors, and the
%% "authornote" and "authornotemark" commands
%% used to denote shared contribution to the research.
\author{Ansgar Scherp}
\email{ansgar.scherp@uni-ulm.de}
\orcid{0000-0002-2653-9245}
\affiliation{%
\institution{Ulm University}
\city{Ulm}
\country{Germany}
}
\author{Ben Trovato}
\authornote{Both authors contributed equally to this research.}
\email{trovato@corporation.com}
\orcid{1234-5678-9012}
\author{G.K.M. Tobin}
\authornotemark[1]
\email{webmaster@marysville-ohio.com}
\affiliation{%
\institution{Institute for Clarity in Documentation}
\streetaddress{P.O. Box 1212}
\city{Dublin}
\state{Ohio}
\country{USA}
\postcode{43017-6221}
}
\author{Lars Th{\o}rv{\"a}ld}
\affiliation{%
\institution{The Th{\o}rv{\"a}ld Group}
\streetaddress{1 Th{\o}rv{\"a}ld Circle}
\city{Hekla}
\country{Iceland}}
\email{larst@affiliation.org}
\author{Valerie B\'eranger}
\affiliation{%
\institution{Inria Paris-Rocquencourt}
\city{Rocquencourt}
\country{France}
}
\author{Aparna Patel}
\affiliation{%
\institution{Rajiv Gandhi University}
\streetaddress{Rono-Hills}
\city{Doimukh}
\state{Arunachal Pradesh}
\country{India}}
\author{Huifen Chan}
\affiliation{%
\institution{Tsinghua University}
\streetaddress{30 Shuangqing Rd}
\city{Haidian Qu}
\state{Beijing Shi}
\country{China}}
\author{Charles Palmer}
\affiliation{%
\institution{Palmer Research Laboratories}
\streetaddress{8600 Datapoint Drive}
\city{San Antonio}
\state{Texas}
\country{USA}
\postcode{78229}}
\email{cpalmer@prl.com}
\author{John Smith}
\affiliation{%
\institution{The Th{\o}rv{\"a}ld Group}
\streetaddress{1 Th{\o}rv{\"a}ld Circle}
\city{Hekla}
\country{Iceland}}
\email{jsmith@affiliation.org}
\author{Julius P. Kumquat}
\affiliation{%
\institution{The Kumquat Consortium}
\city{New York}
\country{USA}}
\email{jpkumquat@consortium.net}
%%
%% By default, the full list of authors will be used in the page
%% headers. Often, this list is too long, and will overlap
%% other information printed in the page headers. This command allows
%% the author to define a more concise list
%% of authors' names for this purpose.
\renewcommand{\shortauthors}{Trovato and Tobin, et al.}
%%
%% The abstract is a short summary of the work to be presented in the
%% article.
\begin{abstract}
This template is for papers, research-based group work reports, seminar works etc.
It is based on a common ACM style, which is both popular in the computer science research community as well as well maintained.
\textbf{Note on the use of ChatGPT}:
We are following the procedure of the International Conference on Machine Learning (ICML), which states: `` The Large Language Model (LLM) policy for ICML 2023 prohibits text produced entirely by LLMs (i.e., “generated”). This does not prohibit authors from using LLMs for editing or polishing author-written text.''. Source: \url{https://icml.cc/Conferences/2023/llm-policy}.
For comments and feature requests, please email Ansgar at
\href{mailto:ansgar.scherp@uni-ulm.de?subject=DSBDA-TemplateForPaper-Annotated}{ansgar.scherp@uni-ulm.de}.
\todo{For the abstract, please follow the Jennifer Widom structure.}
Submission: \textit{We pledge to make the source code and additional resources publicly available upon acceptance of the paper.
An (anonymous) preview for the reviewers can be found at:
\url{http://anonoymo.us/me}.}
Submission (if already available on arXiv): \textit{An earlier version of this paper has been published on arXiv~(add cite). % \cite{add-url}.
We release the source code upon acceptance of the paper.}
Final: \textit{The source code and additional resources are available at: \url{http://anonoymo.us/me}}
\end{abstract}
%%
%% The code below is generated by the tool at http://dl.acm.org/ccs.cfm.
%% Please copy and paste the code instead of the example below.
%%
\begin{CCSXML}
<ccs2012>
<concept>
<concept_id>10010520.10010553.10010562</concept_id>
<concept_desc>Computer systems organization~Embedded systems</concept_desc>
<concept_significance>500</concept_significance>
</concept>
<concept>
<concept_id>10010520.10010575.10010755</concept_id>
<concept_desc>Computer systems organization~Redundancy</concept_desc>
<concept_significance>300</concept_significance>
</concept>
<concept>
<concept_id>10010520.10010553.10010554</concept_id>
<concept_desc>Computer systems organization~Robotics</concept_desc>
<concept_significance>100</concept_significance>
</concept>
<concept>
<concept_id>10003033.10003083.10003095</concept_id>
<concept_desc>Networks~Network reliability</concept_desc>
<concept_significance>100</concept_significance>
</concept>
</ccs2012>
\end{CCSXML}
\ccsdesc[500]{Computer systems organization~Embedded systems}
\ccsdesc[300]{Computer systems organization~Redundancy}
\ccsdesc{Computer systems organization~Robotics}
\ccsdesc[100]{Networks~Network reliability}
%%
%% Keywords. The author(s) should pick words that accurately describe
%% the work being presented. Separate the keywords with commas.
\keywords{datasets, neural networks, gaze detection, text tagging}
\received{20 February 2007}
\received[revised]{12 March 2009}
\received[accepted]{5 June 2009}
%%
%% This command processes the author and affiliation and title
%% information and builds the first part of the formatted document.
\maketitle
\section{Introduction}
\todoyellow{For the author information. Create an ORCID and add it to your record, see example of first author.
You can obtain an ORCID here: \url{https://orcid.org/}
}
Note:
This template is based on the official ``Association for Computing Machinery (ACM) - SIG Proceedings Template'' provided on Overleaf. A documentation is provided in this project. The template is taken from Overleaf:
\url{https://www.overleaf.com/latex/templates/association-for-computing-machinery-acm-sig-proceedings-template/bmvfhcdnxfty}
\todopink{
The official URL to this Overleaf template is:
\url{https://www.overleaf.com/latex/templates/dsbda-templateforpaper-annotated/svwvwvqxfxtp}
You may also use the view link (ready only):
\url{https://www.overleaf.com/read/mpmsdhfcwdfk}.
If you look for a template for presentations/slides, Fabian Singhofer is so kind to share his for DSBDA:
\url{https://www.overleaf.com/read/qxrdtnzrrpwc}
}
Links are ``read''-links, so one can copy it into a new project.
By default, the language is set to American English.
The concept of the teaching programme is also documented and available here:
\url{https://github.com/data-science-and-big-data-analytics/teaching-examples/blob/main/Scherp-TdL21-vortrag.pdf}
Note that there are also new writing tools that support academic writing.
For example, Grammarly: \url{https://www.grammarly.com/blog/academic-writing/}
%\subsection{Motivation}
\label{sec:introduction}
\todoyellow{Note: Yellow boxes provide background information, additional notes, recommendations, etc. and can later be removed.}
\todogreen{Apply Jennifer Widom structure, which is encoded here in the yellow boxes.}
\todoyellow{What is the motivation?}
Motivate your work.
% \subsection{Problem Statement (or: Problem Formalization)}
\todoyellow{What is the problem?}
Describe in precise terms what the problem is that you address.
This definition of the problem is used/referred to throughout the paper.
\todoyellow{Why is it a problem?}
Describe the relevancy of the problem.
\todoyellow{Why is it not yet solved?}
Describe why are existing solutions insufficient.
% \subsection{Contribution}
\todoyellow{What is our solution approach?}
Describe the method/algorithm that you propose to solve the problem.
\todoyellow{What are the results?}
Describe key results from your experiments.
Mention datasets, measures, and observations.
Reflect on the key insights by a brief discussion.
Make the reader interested in your paper.
\todoyellow{What is your contribution?}
Below, we summarize our contributions.
\todopink{Point of discussion: your contributions list.}
\begin{itemize}
\item Provide a bullet-itemized list of research questions that you address.
\item Later, each research question will then be turned into a contribution, \ie a brief answer to the question is given.
\end{itemize}
% \subsection{Organization}
The remainder of the paper is organized as follows.
%
Below, we summarize the related works.
Section~\ref{sec:methods} provides a problem statement and introduces our models/methods.
The experimental apparatus is described in Section~\ref{sec:experimentalapparatus}.
An overview of the achieved results is reported in Section~\ref{sec:results}.
Section~\ref{sec:discussion} discusses the results, before we conclude.
\begin{tcolorbox}[title=Instructions]
Follow the structure suggested above.
Write explicit paragraphs for each of the questions.
Furthermore, make sure that the introduction picks up every statement made by the abstract.
The goal of the introduction is to extend the gist provided by the abstract by giving more detail, more context, explanations, and, very important, citations to definitions, related work, and methods.
\end{tcolorbox}
\section{Related Work}
\label{sec:relatedwork}
When reading the related work, we aim to understand the method(s), datasets used, results of the experiments, and what the results mean, \ie how the authors argue about the results in the discussion.
\begin{tcolorbox}[title=Instructions]
To check the trustworthiness of results, we perform always some checks (derived from~\cite{DBLP:journals/corr/abs-2204-03954v5-textclassification}).
%
Papers, where one has to tick one of the items below, do not allow for a fair comparison with the state of the art.
Reasons include that they
\begin{itemize}
\item used different or non-standard benchmark datasets,
\item modified the datasets to use a different number of classes (\ie reducing the number of classes in the preprocessing),
\item modified the datasets to use additional information (\eg additional header metadata in the 20ng text dataset),
\item employed different train-test splits (\eg use more training samples than others),
\item used a different, smaller number of training examples (\eg run their methods only on 5\% of the training data while using a benchmark dataset),
\item not report the train-test splits (and thus the training data used remains unclear),
\item do not report hyperparameter values (particularly the learning rate),
\item do not report an average over multiple runs of the experiments together with the standard deviation (Avg. and SD will allow to assess the influence of random factors like the initialization of model weights),
\item have not optimized or do not use optimal hyperparameter values (\eg the learning rate strongly influences the results as demonstrated at the examples of BERT and RoBERTa by~\citet{DBLP:journals/corr/abs-2204-03954v5-textclassification}),
\item do unsual preprocessing on the datasets (\eg apply preprocessing for models that do not require it like BERT, drop samples in a multi-labeling task that have $1$ label and thus modify the datasets, etc.),
%
or
\item are unclear about the measure(s) used (\eg, while writing ``we use the F-score'' most likely means the (harmonic) F1-score, it still does not detail if micro-averaging, macro-averaging, or samples-averaging F1 is reported)
%
.
\end{itemize}
\end{tcolorbox}
The rationales for not using benchmark datasets or employing other train-test splits are not always clear.
Also, the papers often do not properly report hyperparameter values or miss reporting any other of the items above.
\begin{tcolorbox}[title=As a general rule when reading related work]
Be suspicous and ask yourself: ``Can I trust their results?''
Keep in mind: A primary objective of the paper is to put their method in a good light.
\end{tcolorbox}
And an important lesson when searching for literature.
\begin{tcolorbox}[title=Lesson learned (once) again!]
If you search for literature and do not find anything. Likely you just did not search for the right keywords.
For example, if you search for research on ``(source) code segmentation'', you will be disappointed (or happy) not to find any.
But do not be a fool.
There is work, it is ``text segmentation'' a classical area in natural language processing.
You just have to think about source code being an (artificial) language that any modern tool will process in the same way as a natural language.
A good hint is also if the task is visible in the community.
For text segmentation there exists its own category on Papers with Code, see \url{https://paperswithcode.com/task/text-segmentation}.
\end{tcolorbox}
Writing hint:
%
Use~\cite{Abril07}
% or~\citep{Abril07} --- some other styles support this
or~\citet{Abril07}.
But always put a tilde (\~) before the \symbol{92}cite.
\subsection{Area 1}
\subsection{Area 2}
\subsection{Area ...}
\subsection{Summary/Reflection}
What do we learn from the literature concerning your work?
Where are their strengths, and where are their weaknesses?
What is different in the related work compared to the proposed approach?
\section{Methods [or Models]}
\label{sec:methods}
Methods : Which methods do apply?
\subsection{[Problem Statement/Problem Formalization]}
\label{sec:problemstatement}
(if not done as part of the introduction)
\subsection{Assumptions}
- What are the assumptions that you make?
Note: make sure there is an explicit section or subsection called ``Assumptions'' in your paper.
\subsection{Methods for Aspect 1}
\todopink{Point of Discussion: Provide a bullet-itemized list of the aspects that are considered by your research.
For each aspect, provide a description of the methods/models used and proposed (own methods).
Make sure it is consistent with the research questions/contributions describe in the introduction.
\textit{Example}: Aspects are: a) clustering algorithms, b) embedding methods, c) similarity measures. Instances for a) are DBCAN, $k$-means, etc., b) TF-IDF, BERT, etc., c) cosine similarity.}
\begin{itemize}
\item Method 1
\item Method 2
\item ...
\end{itemize}
\subsection{Methods for Aspect 2}
\subsection{Methods for Aspect 3}
\subsection{Summary}
\section{Experimental Apparatus}
\label{sec:experimentalapparatus}
Follow the description of the experimental apparaturs given the structure below.
\todoyellow{Make sure to cover the questions provided in the EMNLP checklist, see Appendix~\ref{app:emnlp2021-checklist}.}
\subsection{Datasets}
\label{sec:datasets}
Datasets: Which datasets do you use?
Provide descriptive statistics, usually in tabluar form.
\todopink{Point of Discussion: Make sure that your datasets fit to the problem and research questions, respectively.
Make sure that the datasets are available.
Available means that you have a) the license obtained (if needed) and b) the datasets are actually on your disk (copied).}
\subsection{Preprocessing OR Pre-processing}
\label{sec:preprocessing}
\subsection{Procedure}
\label{sec:procedure}
\todopink{Point of Discussion: Describe which methods you use along the aspects defined in your research, on which datasets they are applied, etc. Make sure it reflect fully the experiments that you want to carry out according to your own plan defined in the research questions.}
Procedure : How do you run your experiments?
\todoyellow{Note: Preprocessing can also be part of procedure.}
\subsection{Hyperparameter Optimization}
\label{sec:hyperparameteroptimization}
\todoyellow{Note: If space is limited, this can be moved to supplementary materials}
\todopink{Point of Discussion: What are the (critical) hyperparameters that you need to consider (beyond the learning rate)?
How do you plan to optimize the hyperparameters with respect to the models and datasets?
What is the hyperparameter search space?}
\subsection{Measures OR Metrics}
\label{sec:measures}
Measure: How do you measure the results?
\todopink{Point of Discussion:
Regarding the measurements and what to measure, \ie to which level of detail, please carefully read:
John Ousterhout's article on ``\textit{Always Measure One Level Deeper}''~\cite{DBLP:journals/cacm/Ousterhout18}.}
% URL:
% https://cacm.acm.org/magazines/2018/7/229031-always-measure-one-level-deeper/fulltext
\section{Results}
\label{sec:results}
- Report your results in tabular or otherwise structured form.
- Limit to objective results, no interpretation of results
\subsection{RQ1 Results}
\label{sec:results-rq1}
\subsection{RQ2 Results}
\label{sec:results-rq2}
\subsection{... Results}
\label{sec:results-rq...}
\section{Discussion}
\label{sec:discussion}
- Now interpret and reflect on your results.
\subsection{Key Scientific Insights [Gained from the Results]}
\label{sec:keyresults}
- What is the key takeaway? Reflect on the results (what have we learned from them)?
- What are the key results of your research?
- What interesting insights could you obtain?
- Break down by research question.
\subsection{Threat to Validity}
\label{sec:threattovalidity}
- Why may your results be biased/not trustworthy? And why in fact are they trustworthy! How reliable are your analyses? Meaning, critically reflect on whether there may be errors / biases in your analyses. So: What (possible) threats exist that could have made the results unreliable, AND why are these not threats?
- Trick is to write down potential threats and explain why they don't hold true here!
- How reliable are your analyses? Meaning, critically reflect on whether there may be errors / biases in your analyses.
\subsection{Generalization}
\label{sec:generalization}
- Will the results be transferable/generalize to other datasets, tasks, models, etc?
- Can one transfer the insights/results to other datasets? ... other scenarios? ... other algorithms? Why can we assume that the results generalize?
Why?
\subsection{Future Work and Impact}
\label{sec:futurework}
What is future work?
What is the general impact of your work?
--- pick up arguments from introduction etc.
[- But also: What is the practical impact. ]
\section{Conclusion}
\label{sec:conclusion}
\todoyellow{Summarize the key results in an interesting and new way.
For example by expanding it to a general broader scope of science, economics, impact to life, etc. :-)}
Provide a brief outlook to future work! (If not described in the Section~\ref{sec:futurework})
\section*{Limitations}
- Reflect on the limitations of your work, so what conclusion cannot or should not be derived from the work.
See also EMNLP's \textbf{Mandatory Discussion of Limitations}.
\begin{quote}
We believe that it is also important to discuss the limitations of your work, in addition to its strengths. EMNLP 2023 requires all papers to have a clear discussion of limitations, in a dedicated section titled “Limitations”. This section will appear at the end of the paper, after the discussion/conclusions section and before the references, and will not count towards the page limit. Papers without a limitation section will be automatically rejected without review.
\end{quote}
[...]
\begin{quote}
While we are open to different types of limitations, just mentioning that a set of results have been shown for English only probably does not reflect what we expect. Mentioning that the method works mostly for languages with limited morphology, like English, is a much better alternative. In addition, limitations such as low scalability to long text, the requirement of large GPU resources, or other things that inspire crucial further investigation are welcome.
\end{quote}
\url{https://2023.emnlp.org/calls/main_conference_papers/#mandatory-discussion-of-limitations}
\section*{Author Statement}
Author statement based on CRediT (Contributor Roles Taxonomy), see: \url{https://www.elsevier.com/authors/policies-and-guidelines/credit-author-statement}
%%
%% The acknowledgments section is defined using the "acks" environment
%% (and NOT an unnumbered section). This ensures the proper
%% identification of the section in the article metadata, and the
%% consistent spelling of the heading.
\begin{acks}
This template is co-funded under the ``2LIKE - Artificial Intelligence for Individualised Learning Path and Processes'' (16DHBKI001) project by the German Federal Ministry of Education and Research (BMBF) and the Ministry of Science, Research and the Arts Baden-Württemberg within the funding line Artificial Intelligence in Higher Education.
\todoyellow{IF YOU USE THE bwHPC CLUSTER, YOU CAN ADD:
The authors acknowledge support by the state of Baden-Württemberg through bwHPC. }
\creditmasterproject{SEMESTER+YEAR}\mysupervisorrole
\creditmasterproject{2022}
\end{acks}
%%
%% The next two lines define the bibliography style to be used, and
%% the bibliography file.
\bibliographystyle{ACM-Reference-Format}
\bibliography{dsbda-references}
%%
%% If your work has an appendix, this is the place to put it.
\appendix
\section{Supplementary Materials}
\label{appendix:supplementarymaterials}
\todoyellow{Note: Backward references to main part of the paper is ok.
But do not directly refer to figures or tables from body to here.}
\subsection{Extended Related Work}
\label{appendix:extendedrelatedwork}
\subsection{Extended Results}
\label{appendix:extendedresults}
\subsection{Hyperparameter Optimization}
\label{appendix:hyperparameteroptimization}
\subsection{Detailed Discussions}
\label{appendix:detaileddiscussion}
\subsection{...}
\section{Useful Research Resources}
\input{resources/interesting-paper}
\input{resources/surveys}
\input{resources/books}
\section{Data Science and Big Data Analytics (DSBDA) Group}
% ------
\subsection{Data Science Readings}
We are running a reading club on Data Science on Wednesdays.
\textbf{How it works:}
Idea of the reading club is to have a joined chat about recent research papers. Particular focus is text analytics and graph analytics, and general recent methods in deep learning.
Procedure is usually as follows:
\begin{itemize}
\item Someone proposes a paper/topic, which is well before the meeting disseminated.
\item So everyone has time to read the paper and is actually also expected to have read the paper (otherwise discussions are not so much fun!)
\item During the meeting, the proposer briefly summarizes the paper, including key strengths and weaknesses.
\item Followed by a round-robin quick feedback from everyone.
\item Discussion goes into the details ... :-)
\end{itemize}
\textbf{How to subscribe:}
Interested?
Go here to subscribe:
\url{https://imap.uni-ulm.de/lists/subscribe/data-science-readings}
This is a mailing list on which you receive current information:
\url{mailto:data-science-readings@lists.uni-ulm.de}
% ------
\subsection{Lectures, Seminars, Project Groups, and Theses}
\textbf{Lectures:}
We offer a couple of different lectures for both BSc and MSc students.
These are available for self-enrolment with all materials available for download.
Please contact us to get information which lectures will be offered the next terms.
\begin{itemize}
\item ``Graph Analytics and Deep Learning'',
Self-enrolment for slides (winter 2022/23):
\url{https://moodle.uni-ulm.de/course/view.php?id=36399}
\item ``Text Analytics and Deep Learning'',
Self-enrolment for slides (winter 2021/22):
\url{https://moodle.uni-ulm.de/course/view.php?id=26119}
\item ``Web Information Retrieval (and Deep Learning)'',
Self-enrolment for slides (summer 2021):
\url{https://moodle.uni-ulm.de/course/view.php?id=22260}
\item ``Advanced Methods in) Data Mining and Machine Learning'',
Self-enrolment for slides (winter 2020/21):
\url{https://moodle.uni-ulm.de/course/view.php?id=16999}
There are also slides for the full 4 SWS module (same moodle course):
\url{https://moodle.uni-ulm.de/mod/folder/view.php?id=254324}
\end{itemize}
My concept for research-based teaching:
\url{https://www.uni-ulm.de/fileadmin/website_uni_ulm/zle/Tag_der_Lehre/downloads/Scherp-TdL21-vortrag.pdf}
\textbf{Seminar and Projects:}
We also regularly offer seminars on data science (BSc/MSc), as well as the module ``Project Data Science''.
For projects, please contact us.
\textbf{Theses:}
If you are interested in a BSc or MSc thesis, please contact us.
We have compiled a couple of topics here:
\url{https://docs.google.com/presentation/d/1k1aEZYX_UM8rWlojgGTV11O85Lu104e2K-CBDg-k-9A}
% ------
\subsection{Examples of Student Submissions}
This folder contains examples of submissions from the last years (in PDF).
\url{https://github.com/data-science-and-big-data-analytics/teaching-examples}
Please refer to the corresponding sub-folders for an example relevant to a practical group project submitted in the context of a lecture, MSc project, seminar (written for MSc but also suitable for BSc), and MSc thesis.
% ------
\subsection{Examples of Data Science Frameworks}
This git repository explains how to use selected data science frameworks.
\url{https://github.com/data-science-and-big-data-analytics/data-science-frameworks}
A README explains how to use it.
Furthermore, helpful tips and available infrastructure are stated (bwCloud, bwUniCluster, and Google Colab).
We have also added a slide deck explaining the frameworks a bit and how to use the cloud compute services available to you.
Slides explaining this code (with comment function available):
\url{https://docs.google.com/presentation/d/1v41r4zBfYMe7okcziThfDqt0vqsKrPPYjNDRQHZksRI}
% ------
\subsection{Examples of Peer-reviewed Publications from Student Submissions}
Some selected publications from student submissions.
Will be updated and completed shortly.
\begin{itemize}
\item MSc Thesis Fabian Singhofer [DocEng ‘21] (B ranked), \textbf{Best paper award!}, \url{https://arxiv.org/abs/2105.08842}
\item Project STEREO [iiWAS’ 21] (C ranked), \url{https://arxiv.org/abs/2103.14124}
\item Project Text Summarization [iiWAS’ 21] (C ranked), \url{https://arxiv.org/abs/2105.11908}
\item MSc Thesis Ishwar Venugopal [IJCNN ‘21] (A ranked), \url{https://arxiv.org/abs/2102.07838}
\item MSc Thesis Morten Jessen [DocEng ‘19] (B ranked), \textbf{Best student paper award!}, \url{https://dl.acm.org/doi/10.1145/3342558.3345396}
\item MSc Thesis Florian Mai [JCDL ‘18] (\textbf{A* ranked}), \url{https://arxiv.org/abs/1801.06717}
\item Project Quadflor: [KCAP '17] (A ranked), \url{https://arxiv.org/abs/1705.05311}
\item MSc Thesis Gregor Große-Bölting [KCAP ‘15, \cite{DBLP:conf/kcap/Grosse-BoltingN15}]: \textbf{Best student paper nomination!}, \url{https://dl.acm.org/doi/10.1145/2815833.2815838}
\end{itemize}
\section{EMNLP 2021 Submission Guidelines}
\label{app:emnlp2021-checklist}
FROM EMMNLP Submission Call,
\url{https://2021.emnlp.org/call-for-papers}
=============================
Ethics / Impact Statement
-------------------------
Tick below if your submission contains an ethics consideration / impact statement. Note that the impact statement is optional..
I/We have included an ethics / impact statement as part of our conference submission and understand that this will be taken into consideration during the review process.
Reproducibility Checklist
-------------------------
Before you submit, please make sure that the following reproducibility checklist is filled.
For all reported experimental results:
--------------------------------------
A clear description of the mathematical setting, algorithm, and/or model (*)
Submission of a zip file containing source code, with specification of all dependencies, including external libraries, or a link to such resources (while still anonymized) (*)
Description of computing infrastructure used (*)
The average runtime for each model or algorithm (e.g., training, inference, etc.), or estimated energy cost (*)
Number of parameters in each model (*)
Corresponding validation performance for each reported test result (*)
Explanation of evaluation metrics used, with links to code (*)
For all experiments with hyperparameter search:
-----------------------------------------------
The exact number of training and evaluation runs (*)
Bounds for each hyperparameter (*)
Hyperparameter configurations for best-performing models (*)
Number of hyperparameter search trials (*)
The method of choosing hyperparameter values (e.g., uniform sampling, manual tuning, etc.) and the criterion used to select among them (e.g., accuracy) (*)
Summary statistics of the results (e.g., mean, variance, error bars, etc.) (*)
For all datasets used:
----------------------
Relevant details such as languages, and number of examples and label distributions (*)
Details of train/validation/test splits (*)
Explanation of any data that were excluded, and all pre-processing steps (*)
A zip file containing data or link to a downloadable version of the data (*)
For new data collected, a complete description of the data collection process, such as instructions to annotators and methods for quality control (*)
If the above items are not applicable or if you have any additional comments, please provide your feedback below.
Note:
This list is based on Dodge et al, 2019 and Joelle Pineau's reproducibility checklist.
Dodge: \url{https://www.aclweb.org/anthology/D19-1224.pdf}
Pinaue \url{https://www.cs.mcgill.ca/~jpineau/ReproducibilityChecklist.pdf}
Further checklists for papers:
CoLLAs 2024, \url{https://lifelong-ml.cc/reproducibility}
NeurIPS 2021 Paper Checklist Guidelines, \url{https://neurips.cc/Conferences/2021/PaperInformation/PaperChecklist}
\section{Administrative and Others}
\paragraph{Structure of the proposal}
You may well use this template also for writing the proposal of your thesis.
Please make sure to cover these topics.
\begin{itemize}
\item Motivation
\item Problem statement (incl. assumptions!)
\item Research questions (separate in mandatory / optional)
\item Methods (you plan to apply and/or newly develop)
\item Dataset(s) (possibly also: benchmarks)
\item Related work (few, key papers only in the proposal)
\item Schedule (how to use the 6 months of work; commonly we use 4 months for develop, 2 for evaluation; writing starts on day 1)
\end{itemize}
Proposal is typically short, few pages (\eg 1-2 A4 pages) in this template.
\paragraph{Forms for registering a thesis at UULM}
MSc Thesis:
\url{https://www.uni-ulm.de/fileadmin/website_uni_ulm/studium/Studienorganisation/Pruefungsanmeldung/Formulare/antrag_masterarbeit_WEB.pdf}
BSc Thesis:
\url{https://www.uni-ulm.de/fileadmin/website_uni_ulm/studium/Studienorganisation/Pruefungsanmeldung/Formulare/antrag_bachelorarbeit_WEB.pdf}
And do not forget to have your signature on the paper regarding the statement of originality, see following page.
\newpage
\section*{Fun}
See also paper templates, but in other disciplines.
\url{tinyurl.com/paper-template}
$\rightarrow$
\url{https://drive.google.com/file/d/1IaQpS5blxHNIKEBoXh0kQPRGjAtXr6XZ/view}
and
\url{tinyurl.com/papertemplate}
$\rightarrow$
\url{https://www.kidzone.ws/magic/walkthrough-t.htm}
\include{declaration}
\end{document}
\endinput
%%
%% End of file `sample-authordraft.tex'.