diff --git a/report/Claudio_Maggioni_report.pdf b/report/Claudio_Maggioni_report.pdf index a04499e6..343b055f 100644 Binary files a/report/Claudio_Maggioni_report.pdf and b/report/Claudio_Maggioni_report.pdf differ diff --git a/report/Claudio_Maggioni_report.tex b/report/Claudio_Maggioni_report.tex index ba3d7081..ffe9b32f 100644 --- a/report/Claudio_Maggioni_report.tex +++ b/report/Claudio_Maggioni_report.tex @@ -1,6 +1,4 @@ \documentclass{usiinfbachelorproject} -\title{Understanding and Comparing Unsuccessful Executions in Large Datacenters} -\author{Claudio Maggioni} \usepackage{enumitem} \usepackage{fontawesome5} \usepackage{tikz} @@ -8,19 +6,24 @@ \usepackage{parskip} \setlength{\parskip}{5pt} \setlength{\parindent}{0pt} -%\usepackage[printfigures]{figcaps} +%\usepackage[printfigures]{figcaps} % figures at the end of the file \usepackage{xcolor} \usepackage{amsmath} \usepackage{subcaption} \usepackage{booktabs} \usepackage{graphicx} +\usepackage[backend=biber, +style=numeric, +citestyle=ieee]{biblatex} +\addbibresource{references.bib} \captionsetup{labelfont={bf}} + +\title{Understanding and Comparing Unsuccessful Executions in Large Datacenters} %\subtitle{The (optional) subtitle} - +\author{Claudio Maggioni} \versiondate{\today} - \begin{committee} \advisor[Universit\`a della Svizzera Italiana, Switzerland]{Prof.}{Walter}{Binder} @@ -52,21 +55,23 @@ particular in how to schedule computations effectively, avoid wasting resources and avoid failures. In 2011 Google released a month long data trace of its own \textit{Borg} cluster -management system, containing a lot of data regarding scheduling, priority -management, and failures of a real production workload. This data was the -foundation of the 2015 Ros\'a et al.\ paper \textit{Understanding the Dark Side -of Big Data Clusters: An Analysis beyond Failures}, which in its many -conclusions highlighted the need for better cluster management highlighting the -high amount of failures found in the traces. +management system\cite{google-marso-11}, containing a lot of data regarding +scheduling, priority management, and failures of a real production workload. +This data was the foundation of the 2015 Ros\'a et al.\ paper +\textit{Understanding the Dark Side of Big Data Clusters: An Analysis beyond +Failures}\cite{vino-paper}, which in its many conclusions highlighted the need +for better cluster management highlighting the high amount of failures found in +the traces. -In 2019 Google released an updated version of the \textit{Borg} cluster traces, +In 2019 Google released an updated version of the \textit{Borg} cluster +traces\cite{google-marso-19}, not only containing data from a far bigger workload due to the sheer power of Moore's law, but also providing data from 8 different \textit{Borg} cells from datacenters all over the world. These new traces are therefore about 100 times larger than the old traces, weighing in terms of storage spaces approximately -8TiB (when compressed and stored in JSONL format), requiring considerable -computational power to analyze them and the implementation of special data -engineering tecniques for analysis of the data. +8TiB (when compressed and stored in JSONL format)\cite{google-drive-marso}, +requiring considerable computational power to analyze them and the +implementation of special data engineering tecniques for analysis of the data. This project aims to repeat the analysis performed in 2015 to highlight similarities and differences in workload this decade brought, and expanding the @@ -87,8 +92,8 @@ tecniques used to perform the queries and analyses on the 2019 traces. paper}\label{rosuxe0-et-al.-2015-dsn-paper}} In 2015, Dr.~Andrea Rosà, Lydia Y. Chen, Prof.~Walter Binder published a -research paper titled ``Understanding the Dark Side of Big Data -Clusters: An Analysis beyond Failures'' performing several analysis on +research paper titled \textit{Understanding the Dark Side of Big Data +Clusters: An Analysis beyond Failures}\cite{vino-paper} performing several analysis on Google's 2011 Borg cluster traces. The salient conclusion of that research is that lots of computation performed by Google would eventually fail, leading to large amounts of computational power being @@ -121,8 +126,10 @@ termination is nontrivial. Both tasks and jobs lifecyles are represented by several events, which are encoded and stored in the trace as rows of various tables. Among the information events provide, the field ``type'' provides information on -the execution status of the job or task. This field can have the -following values: +the execution status of the job or task. This field can have several values, +which are illustrated in figure~\ref{fig:eventtypes}. + +\begin{figure}[h] \begin{center} \begin{tabular}{p{3cm}p{12cm}} \toprule @@ -153,6 +160,8 @@ following values: \bottomrule \end{tabular} \end{center} + \caption{Overview of job and task event types.}\label{fig:eventtypes} +\end{figure} Figure~\ref{fig:eventTypes} shows the expected transitions between event types. @@ -297,8 +306,7 @@ As stated before, table ``files'' are composed of several Gzip-compressed shards of JSONL record data. The specification for the types and constraints of each record is outlined by Google in the form of a protobuffer specification file found in the trace release -package.\footnote{\href{https://github.com/google/cluster-data/blob/master/clusterdata_trace_format_v3.proto}{Google -2019 Borg traces Protobuffer specification on Github}}. This file was used as +package\cite{google-proto-marso}. This file was used as the oracle specification and was a critical reference for writing the query code that checks, parses and carefully sanitizes the various JSONL records prior to actual computations. @@ -691,5 +699,7 @@ developments}\label{conclusions-and-future-work-or-possible-developments}} \textbf{TBD} +\printbibliography + \end{document} % vim: set ts=2 sw=2 et tw=80: diff --git a/report/Makefile b/report/Makefile index 5ea38b3d..74760ec6 100644 --- a/report/Makefile +++ b/report/Makefile @@ -1,6 +1,7 @@ default: mkdir -p build pdflatex -output-directory=build Claudio_Maggioni_report + biber build/Claudio_Maggioni_report.bcf pdflatex -output-directory=build Claudio_Maggioni_report pdflatex -output-directory=build Claudio_Maggioni_report mv build/Claudio_Maggioni_report.pdf ./ diff --git a/report/references.bib b/report/references.bib index 66b03c41..87e5b67b 100644 --- a/report/references.bib +++ b/report/references.bib @@ -1,8 +1,29 @@ -@book{Stru1899a, - Author = {William Strunk and E. B. White}, - Title = {The Elements of Style}, - Edition = {4th}, - ISBN = {0-205-30902-X}, - Keywords = {}, - Publisher = {Longman Publishers}, - Year = {1899}} \ No newline at end of file +@inproceedings{google-marso-11, +title = {Large-scale cluster management at {Google} with {Borg}}, +author = {Abhishek Verma and Luis Pedrosa and Madhukar R. Korupolu and David Oppenheimer and Eric Tune and John Wilkes}, +year = {2015}, +booktitle = {Proceedings of the European Conference on Computer Systems (EuroSys)}, +address = {Bordeaux, France} +} + +@inproceedings{google-marso-19, +title = {Borg: the Next Generation}, +author = {Muhammad Tirmazi and Adam Barker and Nan Deng and Md Ehtesam Haque and Zhijing Gene Qin and Steven Hand and Mor Harchol-Balter and John Wilkes}, +year = {2020}, +booktitle = {EuroSys'20}, +address = {Heraklion, Crete} +} + +@INPROCEEDINGS{vino-paper, + author={Rosà, Andrea and Chen, Lydia Y. and Binder, Walter}, + booktitle={2015 45th Annual IEEE/IFIP International Conference on Dependable Systems and Networks}, + title={Understanding the Dark Side of Big Data Clusters: An Analysis beyond Failures}, + year={2015}, + volume={}, + number={}, + pages={207-218}, + doi={10.1109/DSN.2015.37}} + +@misc{google-drive-marso, title={Google cluster-usage traces v3.pdf}, url={https://drive.google.com/file/d/10r6cnJ5cJ89fPWCgj7j4LtLBqYN9RiI9/view}, journal={Google Drive}, publisher={Google}, author={Wilkes, John}, year={2020}, month={Aug}} + +@misc{google-proto-marso, title={Google 2019 Borg traces protobuffer specification}, url={https://github.com/google/cluster-data/blob/master/clusterdata_trace_format_v3.proto}, journal={GitHub}, publisher={Google}, author={Deng, Nan}, year={2020}, month={Aug}}