thesis.tex

\documentclass[a4paper,UKenglish]{ifimaster}
\usepackage[utf8]{inputenc}
\usepackage[UKenglish]{duomasterforside}
\usepackage[T1]{fontenc, url}
\usepackage{graphicx,babel,csquotes,textcomp,varioref,float}
\usepackage[backend=biber,style=numeric-comp,sorting=nyt,maxbibnames=20,maxcitenames=20]{biblatex}
\usepackage[toc]{glossaries}
\usepackage{listings}
\usepackage{xcolor}
\usepackage{parskip}
\usepackage{fancyvrb}
\usepackage{tabularx}
\usepackage{rotating}
\usepackage{multirow}
\urlstyle{sf}
\graphicspath{{images/}}
\addbibresource{macros.bib}
\addbibresource{references.bib}

\lstset{
    breaklines=true,
    frame=tb,
    numbers=left,
    stepnumber=1,
    numberstyle=\color{gray},
    keywordstyle=\color{blue},
    stringstyle=\color{red},
    commentstyle=\color{olive},
    keepspaces=true
}

\makeglossaries
\newglossaryentry{golden}
{
    name=golden standard,
    description={The prior scoring standard evolved from the DisGeNET and Dragon
    Database of Genes associated with Prostate Cancer}
}
\newglossaryentry{maa}
{
    name=MAA,
    description={The Multiple Attribute Additive method described to score
    clusters}
}
\newglossaryentry{mam}
{
    name=MAM,
    description={The Multiple Attribute Multiplicative method described to score
    clusters}
}
\newglossaryentry{pr}
{
    name=PR,
    description={The PageRank method described to score clusters}
}
\newglossaryentry{prwp}
{
    name=PRWP,
    description={The PageRank With Priors method described to score clusters}
}
\newglossaryentry{hits}
{
    name=HITS,
    description={The Hyperlink-Induced Topic Search method described to score
    clusters}
}
\newglossaryentry{maven}
{
    name=Maven,
    description={The Java build tool used to compile and build clusterMaker2 and
    Ranklust}
}
\newglossaryentry{ide}
{
    name=Integrated Development Environment,
    description={A development environment specialized in developing software
    products}
}
\newglossaryentry{mcl}
{
    name=MCL,
    description={Markov Cluster algorithm used in Cytoscape to cluster the
    networks}
}
\newglossaryentry{rsquared}
{
    name=R-squared,
    description={The coefficient of determination when talking about the grade
    of fitness of a linear regression}
}
\newglossaryentry{jensen}
{
    name=DISEASE,
    description={DISEASE database used for retrieving z-values, p-values and
    manually curated disease-gene associations}
}
\newglossaryentry{pipeline}
{
    name=pipeline,
    description={The time used from start to end when analyzing the network in
    Cytoscape}
}
\newglossaryentry{dragon}
{
    name=DDPC,
    description={Dragon Database of Genes associated with Prostate Cancer}
}
\newglossaryentry{movember}
{
    name=potential prostate cancer genes,
    description={Data retrieved from the Movember Prostate Cancer Project}
}

\title{Ranklust}
\subtitle{An extension of the Cytoscape clusterMaker2 plugin and its application
to prioritize network biomarkers in prostate cancer}
\author{Henning Lund-Hanssen}

\begin{document}
\duoforside[program={Programming and Networks},
    dept={Department of Informatics},long]

\frontmatter{}

\setlength{\parskip}{12pt}
\setlength{\parindent}{12pt}

\chapter*{Abstract}
Single agent biomarkers such as Prostate-specific antigen (PSA) in prostate
cancer are commonly used for screening, but has in a great number of cases led
to overtreatment. The main flaw with this biomarker is that mutation or
abnormality of a single gene is seldom the single cause of a disease. Rather, it
is the process of interactions between several components in a complex network.
Next-generation technologies provide bioinformaticians with data on a scale
worthy of being labeled "Big Data". One of the advantages gained is the
potential to identify and prioritize genes that promote diseases by analyzing
biologically related networks. Another advantage is the newly gained
understanding of the topological organization of large-scale networks, which has
been achieved through clustering networks, creating subnetworks representing
functions. The goal of this assignment was to combine these two newly adapted
methods in bioinformatics to develop a plugin for Cytoscape, a network data
integration, analysis and visualizationn tool, in order to prioritize network
biomarkers, exemplified in prostate cancer. Through cross-validation,
benchmarking against text mined, manually knowledge curaetd and experimental
data, Ranklust has demonstrated this ability by using the PageRank with Priors
algorithm to rank clusters made with the Markov Cluster algorithm in the human
protein-protein interaction network (PPI). Ranklust is also demonstrated here
to characterize biomarker genes in their network context in prostate cancer,
from single gene lists manually curated from experts in the field and signature
genes of lethal prostate cancer captured from gene expression arrays. Ranklust
is merged into the future developments of ClusterMaker2 and is available on
Github (clusterMaker2 url: https://github.com/RBVI/clusterMaker2, ranklust-app
url: https://github.com/henninglh/ranklust-app).

%Prostate-specific antigen (PSA) is a prostate cancer biomarker used for
%screening that in a great number of cases has led to overtreatment. The main
%flaw with this biomarker is that mutation or abnormality of a single gene is
%seldom the single cause of a disease. Rather, it is the process of interactions
%between several components in a complex network. Next-generation technologies
%provide bioinformaticians with data on a scale worthy of being labeled "Big
%Data". One of the advantages gained is the potential to identify and prioritize
%genes that promote diseases by analyzing biologically related networks. Another
%advantage is the newly gained understanding of the topological organization of
%large-scale molecular networks, which has been achieved through clustering
%networks, creating subnetworks representing functions. The goal of this
%assignment was to combine these two newly adapted methods in bioinformatics to
%develop a plugin for Cytoscape, a network data integration, analysis and
%visualization tool, in order to prioritize network biomarkers for prostate
%cancer. Through cross-validation, benchmarking against text mined, manually
%knowledge curated and experimental data, Ranklust has demonstrated this ability
%by using the PageRank with Priors algorithm to rank clusters made with the
%Markov Cluster algorithm in a protein-protein interaction network (PPI). It
%ended up being merged to one of the future branches of clusterMaker2 on GitHub.

\chapter*{Acknowledgements}
I would first like to thank my two thesis supervisors. Research Fellow and
supervisor Trevor Clancy of the Institute for Cancer Research at Oslo University
Hospital and Group leader and co-supervisor Eivind Hovig of the Institute of
Cancer Research at the Oslo University Hospital and Professor II of the
Institute of Informatics at the University of Oslo. You guys have always been
available when I needed advice, supported me throughout the whole process and
provided me with invaluable knowledge.

Finally, I must express my very profound gratitude to my family, for their
unfailing support and continuous encouragement. To my fellow study companions at
room 3417 Assembler "Assa", Ole-Johan Dahls hus, over the years you guys have
taught me that procrastination is an art best performed in great company. Thank
you.

\tableofcontents{}
\listoffigures{}
\listoftables{}
\lstlistoflistings
\printglossaries

\mainmatter{}

\include{intro}
\include{methods}
\include{results}
\include{conclusion}
\backmatter{}
\printbibliography
\end{document}