\documentclass[11pt]{beamer}
%\documentclass[11pt, draft]{beamer}

\usetheme{Antibes} % or Malmoe -> more somber

%% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - %%
%%                               Load Packages                               %%
%% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - %%

\usepackage[utf8]{inputenc} %% use UTF-8, maybe not needed since 2018
\usepackage[english,main=italian]{babel} %% language

\usepackage{import} %% specify path for import

%% math packages
\usepackage{graphicx} %% for pictures
\usepackage{float}
\usepackage{amssymb} %% math symbols
\usepackage{amsmath} %% math matrix etc
\usepackage{listings} %% code block
\usepackage{tabularray} %% better tables
\usepackage{booktabs} %% rules for tables
\usepackage{mathrsfs}
\usepackage{mathtools}
\usepackage{algorithm} %% for algorithms
\usepackage{algpseudocode} %% loads algorithmicx
\usepackage{amsthm}
\usepackage{thmtools} %% theorems
\usepackage{nicematrix} %% better matrixes

%% plot packages
\usepackage{pgfplots} %% plots used with \begin{tikzpicture}
\usepackage{tikz} %% for pictures
\usetikzlibrary{trees, fit}
\pgfplotsset{width=10cm,compat=newest}

%% design packages
\usepackage{enumitem} %% for lists and enumerating
\usepackage{color}
\usepackage{xcolor,colortbl} % xcolor for defining colors, colortbl for table colors
\usepackage{makecell} %% for multiple lines in cell of table
\usepackage{cancel}
\usepackage{pgfornament} %% ornaments

%% load last
% \usepackage[hidelinks]{hyperref} %% links for table of contents, load last
% \usepackage{bookmark} %% for better table of contents

%% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - %%
%%                       Configuration of the packages                       %%
%% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - %%

%% use bar instead of arrow for vectors
\renewcommand{\vec}[1]{\bar{#1}}
%% easy norm
\newcommand{\norm}[1]{\left\lvert#1\right\rvert}

%% items in itemize emph+box
%% usage: \ieb{Class:} for simple item
%%        \ieb[4cm]{Class:} for specific size of box
\newcommand{\ieb}[2][2cm]{
        \makebox[#1][l]{\emph{#2}}
} %% TODO: replace with description environment (? maybe)

% less vertical space around align & align*
\newcommand{\zerodisplayskips}{
  \setlength{\abovedisplayskip}{0pt}
  \setlength{\belowdisplayskip}{0pt}
  \setlength{\abovedisplayshortskip}{0pt}
  \setlength{\belowdisplayshortskip}{0pt}
}

% make dotfill use all the space available
\renewcommand{\dotfill}{
  \leavevmode\cleaders\hbox to 1.00em{\hss .\hss }\hfill\kern0pt } % chktex 1 chktex 26

% section not in table of contents
\newcommand{\hiddensection}[1]{
  \stepcounter{section}
  \section*{{#1}}
}

\newcommand{\hiddensubsection}[1]{
  \stepcounter{subsection}
  \subsection*{{#1}}
}

\setlength{\fboxsep}{-\fboxrule} % for debugging

%% PACKAGE tabularray
\UseTblrLibrary{amsmath}


%% PACKAGE color
\definecolor{red}{rgb}{1, 0.1, 0.1}
\definecolor{lightgreen}{rgb}{0.55, 0.87, 0.47}
\definecolor{gray}{rgb}{0.3, 0.3, 0.3}
\newcommand{\lgt}{\cellcolor{lightgreen}} %% light green in tables
\newcommand{\gry}{\textcolor{gray}} %% gray text


%% PACKAGE minipage
\newcommand{\thend}[1]{\begin{center}
  \begin{minipage}[c][1em][c]{#1}
    \dotfill{}
  \end{minipage}
\end{center}}


%% PACKAGE thmtools
\declaretheoremstyle[
 headfont=\normalfont\bfseries,
 notefont=\mdseries,
 bodyfont=\normalfont,
]{steo}
\declaretheorem[numbered=no, style=steo]{teorema}
% \declaretheorem[thmbox=S]{teorema}

\declaretheoremstyle[
  headfont=\normalfont\bfseries,
  notefont=\mdseries,
  bodyfont=\normalfont,
]{sdef}
\declaretheorem[numbered=no, style=sdef]{definizione}

\declaretheoremstyle[
  spaceabove=-6pt,
  spacebelow=6pt,
  headfont=\normalfont\bfseries,
  bodyfont=\normalfont,
  postheadspace=1em,
  headpunct={:}
]{sprf}
\declaretheorem[name={Dimostrazione}, style=sprf, numbered=no]{dimostrazione}

%% PACKAGE
\lstset{
  language=C,
  showspaces=false,
  basicstyle=\small\ttfamily,
  numbers=left,
  numberstyle=\tiny,
  breaklines=true,
  postbreak=\mbox{\textcolor{red}{$\hookrightarrow$}\space},
  backgroundcolor = \color{lightgray},
}


%% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - %%

\title[Tesi]{ENTROPIA E MISURE SUBMODULARI DELL'INFORMAZIONE}
\author{Elvis Rossi}
\institute[DIPARTIMENTO DI INFORMATICA]{Dipartimento di Informatica \\
\medskip
Corso di Laurea Triennale in Informatica}
\date{\today\ \\ TESI DI LAUREA}
\logo{\includegraphics[keepaspectratio=true,scale=0.1]{figures/cherubino.eps}}
% \subtitle{} TODO maybe add subtitle?

%% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - %%

\begin{document}

\begin{frame}
  \titlepage % chktex 1
\end{frame}

\section*{Indice}
\begin{frame}[allowframebreaks]
  \frametitle{Indice}
  \tableofcontents
\end{frame}

%% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - %%

% \section{Introduzione}
% %% - - - - - - - - - - - - - - - - - %%
% \subsection{Matroidi}

% \begin{frame}
%   \begin{definizione}    
%     Un \textsc{matroide} $M = (E, \mathscr{I})$ è formato \\
%     da un insieme finito di elementi $E$ e da da un sottoinsieme $\mathscr{I}$ dell'insieme delle parti $\mathcal{P}(E)$ tale che:

%     \begin{itemize}
%     \item[$-$] $\emptyset \in \mathscr{I}$
%     \item[$-$] per ogni sottoinsieme $A$ contenuto in $ I \in \mathscr{I} \Rightarrow A \in \mathscr{I}$
%     \item[$-$] se $I_p$ con $p$ elementi e $I_{p+1}$ con $p+1$ elementi, sottoinsiemi di $\mathscr{I}$, allora esiste un elemento $i$ di $I_{p+1} \setminus I_p$ tale che $I_p \cup \{i\} \in \mathscr{I}$
%     \end{itemize}
%   \end{definizione}
% \end{frame}

% \begin{frame}
%   Quindi se consideriamo una matrice come un insieme astratto di colonne si ottiene un matroide\\

%   \begin{equation*}
%     M =
%     \begin{bNiceMatrix}[right-margin,cell-space-limits=3pt]
%       a_{11} & a_{12} & \cdots & a_{1n}\\
%       \vdots & \ddots & & \vdots\\
%       a_{m1} & a_{m2} & \cdots & a_{mn}
%       \CodeAfter
%       \tikz \node [draw=red, rounded corners = 2pt, inner sep=0.5pt ,fit=(1-1)(3-1)] {} ;
%       \tikz \node [draw=red, rounded corners = 2pt, inner sep=0.5pt ,fit=(1-2)(3-2)] {} ;
%       \tikz \node [draw=red, rounded corners = 2pt, inner sep=0.5pt ,fit=(1-4)(3-4)] {} ;
%     \end{bNiceMatrix}
%   \end{equation*}
%   Tuttavia non tutti i matroidi possono essere espressi come matrici\\
%   L'indipendenza è quindi relativa all'appartenenza all'insieme $\mathscr{I}$, non a una relazione fra gli elementi
% \end{frame}

%% - - - - - - - - - - - - - - - - - %%
% \subsection{Politopi e Poliedri}

% \begin{frame}
%   \begin{definizione}[inviluppo convesso]
%     {\fontsize{10}{12}\[ \text{conv}(K) = \{\lambda_1 \textbf{x}_1 + \ldots + \lambda_k \textbf{x}_k : \{\textbf{x}_1,\ldots,\textbf{x}_k \}\subseteq K, \lambda_i\geq0, \sum_{i=1}^k \lambda_i = 1 \} \]} % chktex 11
%   \end{definizione}

%   \begin{definizione}[cono]
%     \[ \text{cono}(Y) = \{ \lambda_1 \textbf{y}_1 + \ldots + \lambda_k \textbf{y}_k : \{\textbf{y}_1,\ldots,\textbf{y}_k\}\subseteq Y, \lambda_i \ge 0\} \] % chktex 11
%   \end{definizione}
% \end{frame}

% \begin{frame} %%% TODO immagini V-politopo e V-poliedro
%   \begin{definizione}
%     Un $\mathcal{V}$-\textsc{politopo} è l'inviluppo convesso di un insieme finito di punti in $\mathbb{R}^d$
%   \end{definizione}

%   \begin{definizione}
%     Un $\mathcal{V}$-\textsc{poliedro} è la somma di Minkowski di una convoluzione di punti e un cono
%   \end{definizione}
% \end{frame}

% \begin{frame} %%% TODO immagini H-politopo e H-poliedro
%   \begin{definizione}
%     Un $\mathcal{H}$-\textsc{politopo} è un $\mathcal{H}$-\textsc{poliedro} ``finito'', cioè che non contiene semirette.
%   \end{definizione}

%   \begin{definizione}
%     Un $\mathcal{H}$-\textsc{poliedro} è l'intersezione di un numero finito di semispazi chiusi in $\mathbb{R}^d$\\
%     cioè data una matrice $A$ di dimensione $m \times d$ e un vettore $\textbf{z}$ di $\mathbb{R}^m$\\
%     $P(A,\textbf{z}) = \{ \textbf{x}\in\mathbb{R}^d : A \textbf{x} \le \textbf{z} \}$
%   \end{definizione}
% \end{frame}

% \begin{frame}
%   Le definizioni di $\mathcal{V}$-\textsc{politopo} e di $\mathcal{H}$-\textsc{politopo} sono equivalenti.
%   Le definizioni di $\mathcal{V}$-\textsc{poliedro} e di $\mathcal{H}$-\textsc{poliedro} sono equivalenti.
% \end{frame}

%% - - - - - - - - - - - - - - - - - %%
\section{Funzioni submodulari e Proprietà}
\begin{frame} %% TODO drawing?
  \begin{definizione}[Submodularità]
    Data $f: \mathcal{P}(S) \to \mathbb{R}$, è submodulare se:
    \begin{equation*}
      f(T) + f(U) \ge f(T \cap U) + f(T \cup U)\quad \text{ per ogni } T,U \subset S
    \end{equation*}
  \end{definizione}
  \begin{definizione}
    $f$ è supermodulare se $-f$ è submodulare
  \end{definizione}
  \begin{definizione}
    $f$ è modulare se è sia submodulare che supermodulare
  \end{definizione}
  Se $f$ è modulare allora $f(U) = \omega(U)+ \gamma$ con $\gamma$ costante e $\omega: S \to \mathbb{R}$ \\
  \begin{flushright}
    $\omega(U) = \sum_{s \in U}\omega(s)$\hspace*{1em}
  \end{flushright}
\end{frame}

% \begin{frame}
%   \begin{definizione}
%     $f$ è chiamata non decresecente se $f(T) \le f(U)$ per qualsiasi $T \subseteq U \subseteq S$
%   \end{definizione}
%     \begin{definizione}
%     $f$ è chiamata non crescente se $f(T) \ge f(U)$ per qualsiasi $T \subseteq U \subseteq S$
%   \end{definizione}
%     \begin{definizione}
%     $f$ è chiamata normalizzata se $f(\emptyset) = 0$
%   \end{definizione}
% \end{frame}

% \begin{frame}
%   Si possono definire due poliedri associati a una funzione $f$ submodulare
%   \begin{definizione}[Polimatroide associato a $f$]
%     \begin{equation*}
%       P_f = \{x \in \mathbb{R}^S: x \ge \textbf{0}, x(U) \le f(U), \forall U \subseteq S \}
%     \end{equation*}
%   \end{definizione}
%   \begin{definizione}[Polimatroide esteso associato a $f$]
%     \begin{equation*}
%       EP_f = \{x \in \mathbb{R}^S: x(U) \le f(U), \forall U \subseteq S \}
%     \end{equation*}
%   \end{definizione}
% \end{frame}

\begin{frame}
  \begin{definizione}[Derivata prima e Conditional Gain]
    \begin{equation*}
      f^{(1)}(j;V) = f(j|V) = f(\{j\} \cup V) - f(V)
    \end{equation*}
    \begin{equation*}
      f(A|B) = f(A \cup B) - f(B)
    \end{equation*}
  \end{definizione}

  \begin{definizione}[Derivata seconda]
    \begin{equation*}
      f^{(2)}(j,k;V) = f(j|V \cup \{k\}) - f(j|V)
    \end{equation*}
  \end{definizione}

  \begin{definizione}[Derivata terza]
    \begin{equation*}
      f^{(3)}(i,j,k;V) = f^{(2)}(j,k;V\cup \{i\}) - f^{(2)}(j,k;V)
    \end{equation*}
  \end{definizione}
\end{frame}

\begin{frame}
  \begin{definizione}[Submodularità]
    $f$ è submodulare se $f(j|T) \ge f(j|V)\quad\quad \forall T \subseteq V, j \notin V$
  \end{definizione}
  \begin{definizione}[Submodularità]
    $f$ è submodulare se $f^{(2)}(j,k;T)\le 0\quad\quad \forall j,k \notin T $
  \end{definizione}
\end{frame}

\begin{frame}
  \begin{definizione}
    $f$ è una funzione polimatroide se è monotona, non negativa e submodulare.
  \end{definizione}

  Quindi possono essere viste come funzioni d'informazione $\mathcal{I}_f(A) = f(A)$ dato che soddisfano tutte le disuguaglianze di Shannon:
  \begin{itemize}
  \item[$-$] $f(A)$ è normalizzata, cioè $f(\emptyset) = 0$
  \item[$-$] $f(A)$ è monotona non decrescente, cioè per $A \subseteq B \subseteq S$ allora $f(B) \ge f(A)$
  \item[$-$] $f(A)$ è submodulare, cioè $f(A) + f(B) \geq f(A \cup B) + f(A \cap B), \forall A, B \subset V$
  \end{itemize}

  In particolare la classe delle funzioni polimatroidi è strettamente più generale della classe delle funzioni entropiche.
\end{frame}

%% - - - - - - - - - - - - - - - - - %%
\subsection{$\mathcal{I}_f(A;B)$ e $\mathcal{I}_f(A;B|C)$}
\begin{frame}
  \begin{definizione}[Informazione Mutua]
    Data una funzione $f: \mathcal{P}(V) \to \mathbb{R}$, sia $\mathcal{I}_f(A;B) = f(A) - f(A|B)$
  \end{definizione}
  Come per l'informazione mutua dell'entropia $\mathcal{I}_f(A;B) = \mathcal{I}_f(B;A)$\\
  Inoltre l'informazione mutua fra lo stesso elemento è proprio l'informazione dell'elemento: $\mathcal{I}_f(A;A) = f(A)$

  \begin{definizione}[Conditional Mutual Information]
    Data una funzione $f: \mathcal{P}(V) \to \mathbb{R}$, sia $\mathcal{I}_f(A;B|C) = f(A|C) + f(B|C) - f(A \cup B|C)$
  \end{definizione}
\end{frame}

\begin{frame}
  \begin{teorema}
    Se $B$ costante, $\mathcal{I}_f(A;B)$ è submodulare in $A$
    \begin{center}
      se e solo se
    \end{center}
    $f^{(2)}(j,k;A)$ è monotona non decrescente in $A \subseteq V \setminus \{j,k\}$
  \end{teorema}

  \begin{teorema}
    Se $B$ costante, $\mathcal{I}_f(A;B)$ è submodulare in $A$
    \begin{center}
      se e solo se
    \end{center}
    $f^{(3)}(i,j,k;A) \ge 0$
  \end{teorema}
\end{frame}


%% - - - - - - - - - - - - - - - - - %%
% \subsection{$\perp_f$}
% \begin{frame}
%   Date tutte le definizioni precedenti si può adesso definire quando due insiemi sono indipendenti rispetto alla funzione di informazione submodulare\\
%   Tuttavia il modo di definire questa indipendenza non è unico per le funzioni submodulari\\
%   \begin{definizione}[Joint Independence]
%     $A\perp_f^{\text{J}}B$ se $\mathcal{I}_f(A;B) = 0$
%   \end{definizione}
% \end{frame}

%% - - - - - - - - - - - - - - - - - %%
\section{Esempi di funzioni submodulari}
%% - - - - - - - - - - - - - - - - - %%
\subsection{Weighted Set Cover}
\begin{frame} %% TODO disegno?
  \begin{definizione}[Weighted Set Cover Function]
    $f(A) = \omega(\bigcup\nolimits_{a\in A} \gamma(a)) = \omega(\gamma(A))$ con $\omega$ un vettore di pesi su $\mathbb{R}^{\gamma(V)}$
  \end{definizione}

  Si osserva che $\gamma(A \cup B) = \gamma(A) \cup \gamma(B)$, da cui segue $f(A \cup B) = \omega(\gamma(A \cup B)) = \omega(\gamma(A) \cup \gamma(B))$.

  \begin{center}
    \begin{tblr}{colspec={X[1,c]|X[4,c]},
        rowspec={Q[m]|Q[m]}
      }
      $\mathcal{I}_f(A;B)$ & $\sum\limits_{u \in U} \omega_u \cdot \min(c_u(A),c_u(B),1) $ \\
      $f(A|B)$ & $\sum\limits_{u\in U} \omega_u \cdot (1-\min(c_u(B),1)) \cdot \min(c_u(A),1) $ \\
    \end{tblr}
  \end{center}
\end{frame}
%% - - - - - - - - - - - - - - - - - %%
\subsection{Probabilistic Set Cover}
\begin{frame} %% TODO disegno
  \begin{definizione}[Probabilistic Set Cover Function]
    $f(A) = \sum\nolimits_{i \in U}\omega_i \cdot (1-\prod\nolimits_{a\in A}(1-p_{ia}))$, dove $p_{ia}$ rappresenta la probabilità che l'elemento $a \in A$ ricopra l'elemento $i \in U = \gamma(V)$ e $\omega_i > 0$.
  \end{definizione}
  Dato $P_i(A) = \prod_{a \in A}(1-p_{ia})$

  \begin{center}
    \begin{tblr}{width=\linewidth,
        colspec={X[1,c]|X[4,c]},
        rowspec={Q[m]|Q[m]}
      }
      $\mathcal{I}_f(A;B)$ & $ \sum\limits_{i \in U}\omega_i \cdot (1-(P_i(A) + P_i(B) - P_i(A \cup B))) $ \\
      $f(A|B)$ & $ \sum\limits_{i \in U} \omega_i \cdot P_i(B) \cdot (1-P_i(A \setminus B)) $ \\
    \end{tblr}
  \end{center}
  
\end{frame}
%% - - - - - - - - - - - - - - - - - %%
\subsection{Facility Location}

\begin{frame}
  \begin{definizione}[Facility Location Function]
    $f(A) = \sum\nolimits_{i\in V}\max_{a\in A}s_{ia}$, dove $s$ è la matrice di similitudine fra gli elementi in $V$
  \end{definizione}
  $s_{ii} = 1$, altrimenti ha valori inferiori a 1

  \begin{center}
    \begin{tblr}{width=\linewidth,
        colspec={X[1,c]|X[4,c]},
        rowspec={Q[m]|Q[m]}
      }
      $\mathcal{I}_f(A;B)$ & $ \sum\limits_{i\in V}\min(\max\limits_{a\in A}s_{ia}, \max\limits_{b \in B}s_{ib}) $ \\
      $f(A|B)$ & $ \sum\limits_{i\in V}\max(0,\max\limits_{a\in A}s_{ia} - \max\limits_{b\in B} s_{ib}) $ \\
    \end{tblr}
  \end{center}
\end{frame}
%% - - - - - - - - - - - - - - - - - %%
% \subsection{Generalized Graph Cut}

% \begin{frame}
%   \begin{definizione}[Generalized Graph Cut Function]
%     $f(A) = \lambda\cdot(\sum_{i\in V} \sum_{a\in A} s_{ia}) - \sum_{a_1,a_2 \in A} s_{a_1,a_2}$, con $s$ una matrice di similitudine
%   \end{definizione}
%   In modo che $f$ sia una funzione monotona submodulare si richiede che $\lambda \ge 2$

%   \begin{center}
%     \begin{tblr}{width=\linewidth,
%         colspec={X[1,c]|X[4,c]},
%         rowspec={Q[m]|Q[m]}
%       }
%       $\mathcal{I}_f(A;B)$ & $ f(A \cap B) + 2\sum\limits_{a \in A, b \in B}s_{ab}-2\sum\limits_{c\in A \cup B, d \in A \cap B}s_{cd} $ \\
%       $f(A|B)$ & $ f(A \setminus B) - 2 \sum\limits_{a' \in A \setminus B} \sum\limits_{b\in B}s_{a'b} $ \\
%     \end{tblr}
%   \end{center}

%   Se si pone $B = V \setminus A$ allora si ottiene proprio una graph cut function: $ \mathcal{I}_f(A;V \setminus A) = 2\sum\limits_{a\in A}\sum\limits_{b \in V \setminus A}s_{ab} $
% \end{frame}
%% - - - - - - - - - - - - - - - - - %%
% \subsection{Saturated Cover}
% \begin{frame}
%   \begin{definizione}[Saturated Cover Function]
%     $f(A) = \sum_{i\in V}\min(\alpha_i,\sum_{a\in A}s(i,a))$, con $s$ un kernel di similitudine come nel problema del ``facility location'' e $V = \{1,\ldots,n\}$
%   \end{definizione}

%   Sia $m_i(A) = \sum_{a\in A} s(i,a)$, cioè un ``punteggio'' di $A$ per l'elemento $i$.

%   \begin{center}
%     \begin{tblr}{width=\linewidth,
%         colspec={X[1,c]|X[4,c]},
%         rowspec={Q[m]|Q[m]}
%       }
%       $\mathcal{I}_f(A;B)$ & $ \sum\limits_{i\in V}(\min(\alpha_i,m_i(A)) + \min(\alpha_i,m_i(B)) - \min(\alpha_i,m_i(A \cup B))) $ \\
%       $f(A|B)$ & $ \sum\limits_{i \in V}\min(\alpha_i,m_i(A \cup B)) - \min(\alpha_i,m_i(B)) $ \\
%     \end{tblr}
%   \end{center}

%   Si può semplificare l'espressione di $\mathcal{I}_f(A;B)$ e si ottiene:

%   \begin{tblr}{width=\linewidth,colspec={Q[l,m]Q[l,m]},rowspec={Q[m]Q[m]}}
%     $\mathcal{I}_f(A;B)$& $= \sum\limits_{i \in V}\min(\alpha_i,m_i(A),m_i(B),m_i(A)+m_i(B)-\alpha_i)+ $\\
%     \SetCell[c=2]{r,m} $-\min(0,m_i(A \cup B)-\alpha_i)$    
%   \end{tblr}

% \end{frame}

%% - - - - - - - - - - - - - - - - - %%
\section{Problemi per le funzioni submodulari}
%% - - - - - - - - - - - - - - - - - %%
% \subsection{Minimizzazione}
% %% - - - - - - - - - - - - - - - - - %%
% \subsubsection{Minimum Set Cover}
% \begin{frame}
%   \begin{definizione}[Minimum Set Cover Problem]
%     Dato un insieme $S = \{ s_1, \ldots, s_t \}$ e un insieme $C = \{c_1, \ldots, c_k\} \subseteq \mathcal{P}(S)$ con $\forall s\in S, \exists c_j \in C $ tale che $ s \in c_j$,
%     sia una ``set cover'' dell'insieme $S$ un insieme $I \subseteq \{1, \ldots, j\}$ tale che $ \bigcup_{i \in I}c_i = S $.
%     Trovare l'insieme $I^*$ con cardinalità minima.
%   \end{definizione}

%   Il problema è NP-hard dato che si riduce a 3-dimensional matching.
% \end{frame}
% %% - - - - - - - - - - - - - - - - - %%
% \subsubsection{Submodular Set Cover}
% \begin{frame}
%   \begin{definizione}[Submodular Set Cover]
%     Data una funzione $f: \mathcal{P}(S) \to \mathbb{R}$ submodulare non decrescente, trovare il sottoinsieme di peso minimo tale che ``copra'' tutti gli elementi dell'insieme $S$. Cioè:
%     \[ I^* = \min_{I \subset S}\{ \sum_{i \in I}c_i | f(I) = f(S) \} \]
%     Con $c_i$ un peso associato all'elemento $i$ di $S$.
%   \end{definizione}
% \end{frame}
% %% - - - - - - - - - - - - - - - - - %%
% \subsubsection{Submodular Cost Submodular Cover}
% \begin{frame}
%   \begin{definizione}[Submodular Cost Submodular Cover]
%     Date due funzioni $f,g: \mathcal{P}(S) \to \mathbb{R}$ polimatroidi, trovare $I$ che minimizzi:
%     \[ \min_{I \subseteq S}\{f(I)|g(I) \ge c\} \]
%   \end{definizione}
% \end{frame}
% %% - - - - - - - - - - - - - - - - - %%

%% - - - - - - - - - - - - - - - - - %%
% \subsection{Problema dello zaino}
% \begin{frame}
%   \begin{definizione}[Problema dello zaino]
%     Dato un insieme $S = \{ s_1, \ldots, s_t\}$ e una funzione $f: S \to \mathbb{Z}^+$, un valore $b$ che è la dimensione dello zaino e un intero positivo $K$,
%     sia $I$ una partizione dell'insieme $S$ tale che $\sum_{i \in I} f(i) \le b$.
%     La soluzione del problema è quindi l'insieme $I^*$ con $\sum_{i \in I^*} f(i)$ massimo.
%   \end{definizione}
% \end{frame}

%% - - - - - - - - - - - - - - - - - %%
\subsection{Submodular Knapsack Problem}
\begin{frame}
  \begin{definizione}[Submodular Knapsack Problem]
    Data una funzione $f$ submodulare, il problema consiste nel trovare l'insieme con peso dato da $f$ più vicino al limite $b$.\\
    In particolare trovare $\max\limits_{I \subseteq S}\{ f(I) \le b \} $
  \end{definizione}

  \begin{definizione}[Submodular Cost Submodular Knapsack]
    Date due funzioni $f,g: \mathcal{P}(S) \to \mathbb{R}$ polimatroidi, trovare $I$ che massimizzi:
    \[ \max_{I \subseteq S}\{g(I)|f(I) \le b\} \]
  \end{definizione}
\end{frame}

%% - - - - - - - - - - - - - - - - - %%
% \subsection{Submodular Welfare Problem}
% \begin{frame}
%
% \end{frame}

%% - - - - - - - - - - - - - - - - - %%
\section{Algoritmo Greedy}
\begin{frame}
  % Per risolvere questi problemi di massimizzazione e minimizzazione si usa sia nel caso dei matroidi che nel caso dei polimatrodi l'agoritmo greedy.
  \begin{center}
    \begin{algorithm}[H]
      \renewcommand{\thealgorithm}{``greedy''}
      \caption{}\label{alg:cap}
      \begin{algorithmic}[1]
        \State{$S^0 \gets \emptyset, V^0 \gets V, t \gets 1$}
        \While{$ t \ne k $}
          \State{$ i(t) \in V^{t-1} $ tale che $ f(\{i(t)\}|S^{t-1})=\max\limits_{i \in V^{t-1}}f(\{i\}|S^{t-1}) $}

          \If{$ f(\{i(t)\}|S^{t-1}) \le 0$}
            \State{$ k^* \gets t-1\qquad $}\Comment{$ k^* < k $}
            \State{\textbf{return} {$ S^{k^*} $}}
          \ElsIf{$ f(S^{t-1}|\{i(t)\}) > 0 $}
            \State{$ S^t \gets S^{t-1} \cup \{ i(t) \} $}
            \State{$ V^t \gets V^{t-1} \setminus \{i(t)\} $}
            \State{$ t \gets t + 1$}
          \EndIf % chktex 1
        \EndWhile % chktex 1

        \State{$ k^* \gets k\qquad $}\Comment{$ k^* == k $}
        \State{\textbf{return} {$ S^{k^*} $}}
      \end{algorithmic}
    \end{algorithm}
  \end{center}
\end{frame}

\begin{frame}
  Per la massimizzazione si ottiene un'approssimazione di almeno $1-\frac{1}{e}$ rispetto al valore ottimo\\   
  L'approssimazione dell'algoritmo è ottima se si suppone che la funzione oracolo si possa valutare un numero polinomiale di volte
\end{frame}
%% - - - - - - - - - - - - - - - - - %%
\subsection{Curvatura}
\begin{frame}
  Tuttavia spesso si ottengo risultati molto più vicini all'ottimo
  \begin{definizione}[Curvatura]
    Una funzione submodulare $f: \mathcal{P}(V) \to \mathbb{R}^+$ ha curvatura $\kappa \in [0, 1]$ se $f(S+j) - f(S) \ge (1 - \kappa)f({j}), \forall S \subset E, \forall j \in V \setminus S$\\
    cioè $\kappa_f = 1- \min_{j\in V} \frac{f(j|V\setminus \{j\})}{f(j|\emptyset)}$
  \end{definizione}
  Se $\kappa = 0$ allora la funzione è lineare\\

  Per funzioni submodulari non decrescenti si ha un'approssimazione di almeno $\frac{1-e^{-\kappa_f}}{\kappa_f}$, che tende a $1$ per $ \kappa_f \to 0 $
\end{frame}


%% - - - - - - - - - - - - - - - - - %%
% \section{Submodular Mutual Information-Based Summarization}
% \begin{frame}
%   Il problema consiste nel massimizzare $\mathcal{I}_f(A;V \setminus A)$ con $\norm{A}=k$\\
%   $\mathcal{I}_f(A;V \setminus A)$ è submodulare non monotona\\
%   Quindi l'approssimazione di almeno $1-\frac{1}{e}$ dell'algoritmo greedy non è garantita\\

%   \begin{teorema}
%     Sia $f(j)\le1, \forall j \in V$, allora $g(A) = \mathcal{I}_f(A;V \setminus A)$ è $\epsilon$-ap\-pros\-si\-ma\-ti\-va\-men\-te monotona per un $A$ con fattore $\kappa_f(A)$,\\
%     cioè $g(j|A) \ge -\kappa_f(A), \forall j \in V, A \subseteq V$, con $\kappa_f(A) = \max_{j \in V \setminus A}\frac{f(j|V \setminus (A \cup j))}{f(j)}$
%   \end{teorema}

%   Si ottiene quindi $\hat{A}$ con $\bigl|\hat{A}\bigr|=k$ tale che $ \mathcal{I}_f(\hat{A};V \setminus \hat{A}) \ge (1-\frac{1}{e})\cdot(g(A^*) - k \cdot \kappa_f(A^*))$
% \end{frame}


%% - - - - - - - - - - - - - - - - - %%
\section{Query-Based and Privacy Preserving Summarization}
%% - - - - - - - - - - - - - - - - - %%
\subsection{Query-Based Summarization}
\begin{frame}
  \textit{Ottimizzazione diretta massimizzando l'informazione mutua}\\
  Data $g$ funzione submodulare e $\lambda \in \mathbb{R}$, massimizzare l'informazione mutua fra ``query set'' $Q$ e l'insieme $A$, più un termine di correzione per diversità/rappresentazione\\
  \[ \max_{A \subseteq V, \norm{A} \le j} \mathcal{I}_f(A;Q) + \lambda \cdot g(A) \]
  In generale $\mathcal{I}_f(A;Q)$ non è submodulare, ma lo è se $f^{(3)}(i,j,k;A) \ge 0$ e $g$ è monotona submodulare
\end{frame}

\begin{frame}
  \textit{Constrained formulation usando il conditional gain}\\
  \begin{equation*}
    \begin{dcases}
      \max_{A \subseteq V}g(A) \\
      f(A|Q) \le \epsilon \\
      \norm{A} \le k
    \end{dcases}
  \end{equation*}

  $\mathcal{I}_f(A;Q) = f(A) - f(A|Q)$ allora massimizzare $\mathcal{I}_f(A;Q)$ è equivalente a minimizzare $f(A|Q)$\\
  La formulazione ammette un'approssimazione bi-criteria di $ \left[ \textcolor[RGB]{148,17,0}{ 1-\frac{1}{e}}, \textcolor[RGB]{0,84,147}{\frac{n}{1+(n-1)(1-\kappa_f)}} \right] $\\
  cioè una soluzione $ g(\hat{A})\ge (\textcolor[RGB]{148,17,0}{1-\frac{1}{e}})\cdot g(A^*) $, $f(A|Q)\le \textcolor[RGB]{0,84,147}{\frac{n}{1+(n-1)(1-\kappa_f)}} \cdot \epsilon$ e $ \norm{\hat{A}} \le \textcolor[RGB]{0,84,147}{\frac{n}{1+(n-1)(1-\kappa_f)}} \cdot k $
\end{frame}

%% - - - - - - - - - - - - - - - - - %%
\subsection{Privacy Preserving Summarization}
\begin{frame}
  \textit{Ottimizzazione diretta minimizzando l'informazione mutua}\\
  \[ \max_{A \subseteq V, \norm{A}\le k}\lambda g(A) - \mathcal{I}_f(A;P) = \max_{A \subseteq V, \norm{A}\le k} \lambda g(A) +f(P|A) \]
  $f(P|A)$ è submodulare in $A$ soltanto se $f^{(3)}(i,j,k;A)\le 0$, quindi la massimizzazione non è trattabile nella maggior parte dei casi
\end{frame}

\begin{frame}
  \textit{Ottimizzazione diretta massimizzando il conditional gain}\\
  Invece di massimizzare $f(P|A)$ viene massimizzato $f(A|P)$\\
  \[ \max_{A\subseteq V, \norm{A}\le k} \lambda g(A) + f(A|P) \]

  L'algoritmo greedy ammette un'approssimazione di almeno $1-\frac{1}{e}$
\end{frame}

\begin{frame}
  \textit{Constrained formulation usando l'informazione mutua}\\
  \begin{equation*}
    \begin{dcases}
      \max_{A\subseteq V}g(A)\\
      \mathcal{I}_f(A;P)\le\epsilon\\
      \norm{A} \le k
    \end{dcases}
  \end{equation*}
  $\mathcal{I}_f(A;P)$ è submodulare se $f^{(3)}(i,j,k;A) \ge 0$\\
\end{frame}

%% - - - - - - - - - - - - - - - - - %%
\subsection{Joint Query e Privacy Preserving Summarization}
\begin{frame}
  \textit{Ottimizzazione diretta utilizzando l'informazione mutua}\\
  \[ \max_{A\subseteq V, \norm{A}\le k} \mathcal{I}_f(A;Q) - \mathcal{I}_f(A;P) + \lambda g(A) \]
  Tuttavia non è adeguata quando si pone $Q=V$ o $Q=\emptyset$\\
  \begin{center}    
    \begin{tblr}{Q[r,m]Q[l,m]}
      $Q=V$ & $f(A)-\lambda g(A) - \mathcal{I}_f(A;P)$\\
      $Q=\emptyset$ & $ \lambda g(A) - \mathcal{I}_f(A;P)$
    \end{tblr}
  \end{center}
\end{frame}

\begin{frame}
  \textit{Ottimizzazione diretta sommando i termini $Q$ e $P$}\\
  \begin{equation*}
    \max_{A\subseteq V, \norm{A}\le k} \lambda_1 \mathcal{I}_f(A;Q) + \lambda_2 f(A|P) + g(A)
  \end{equation*}
  Si ha quindi un'approssimazione di almeno $1-\frac{1}{e}$\\
\end{frame}

\begin{frame}
  \textit{Constrained formulation by combining $Q$ and $P$}\\
  \begin{equation*}
    \begin{cases}
      \max\limits_{A\subseteq V} g(A) \\
      f(A|Q) \le \epsilon_1 \\
      \mathcal{I}_f(A;P) \le \epsilon_2 \\
      \norm{A} \le k \\
    \end{cases}
  \end{equation*}
  Tuttavia si necessita che $\mathcal{I}_f(A;P)$ sia submodulare, quindi che $f^{(3)} \ge 0$
\end{frame}

\begin{frame}
  Si può quindi riformulare usando $f(A|P)$ invece che $\mathcal{I}_f(A;P)$\\
  \begin{equation*}
    \begin{cases}
      \max_{A\subseteq V}g(A) + \lambda_2 f(A|P)\\
      f(A|Q) \le \epsilon_1 \\
      \norm{A} \le k \\
    \end{cases}
  \end{equation*}
  Risulta quindi un'istanza del Submodular Cost Submodular Knapsack Problem
\end{frame}

\begin{frame}
  \textit{Ottimizzazione diretta con una funzione obbiettivo comune per i termini $P$ e $Q$}\\
  \begin{equation*}
    \max_{A\subseteq V, \norm{A}\le k} \mathcal{I}_f(A;Q|P) + \lambda g(A)
  \end{equation*}
  $\mathcal{I}_f(A;Q|P) = \mathcal{I}_f(A;Q) - \mathcal{I}_f(A;Q;P)$ quindi si massimizza la somiglianza con $Q$ e si minimizza la somiglianza con $P$\\
  Si ha quindi un'approssimazione di almeno $1-\frac{1}{e}$ se $f^{(3)}(i,j,k;A)\ge 0$
  \begin{center}
    \begin{tblr}{Q[r,m]Q[l,m]}
      se $Q=V$ & $f(A|P)+\lambda g(A)$\\
      se $P=\emptyset$ & $\mathcal{I}_f(A;Q)+\lambda g(A)$\\
      se $Q=V, P=\emptyset$ & $f(A) +\lambda g(A)$\\
    \end{tblr}
  \end{center}

\end{frame}

\begin{frame}

\end{frame}

%% - - - - - - - - - - - - - - - - - %%
% \section{Clustering and Partitioning using the Multi-Set Mutual Information}
%% - - - - - - - - - - - - - - - - - %%
\hiddensection{Minimization of Submodular Information Metric}
\hiddensubsection{$D_f(A;B)$}
\begin{frame}
  \begin{definizione}[Variation of Information]
    Data $f: \mathcal{P}(V) \to \mathbb{R}$, sia $D_f(A;B) = f(A \cup B) - \mathcal{I}_f(A;B) = f(A|B) - f(B|A)$
  \end{definizione}

  Tuttavia $D_f(A;B)$ è una metrica soltanto se $f$ ha curvatura $\kappa_f = 1 - \min_{j\in A} \frac{f(j|V\setminus \{j\})}{f(j|\emptyset)}$ minore di $1$\\
  Se la curvatura è $0$ allora è proprio uguale alla distanza di Hamming

  \begin{definizione}[Distanza Submodulare di Hamming]
    Data $f: \mathcal{P}(V) \to \mathbb{R}$, sia $D^{\text{SH}}_f(A;B) = f((A \setminus B) \cup (B \setminus A))$
  \end{definizione}
  
  Di cui si ha un'approssimazione additiva
  \begin{definizione}
    Data $f: \mathcal{P}(V) \to \mathbb{R}$, sia $D^{\text{SHA}}_f(A;B) = f(A \setminus B) + f(B \setminus A)$
  \end{definizione}
\end{frame}

\hiddensubsection{Minimization}
\begin{frame}
  Minimizzare la distanza fra $A \in \mathcal{P}(V)$ e $S_1, S_2, \ldots, S_m$:
  \[ \min\limits_{A \subseteq V} \sum_{i=1}^m D_f(A,S_i) \]
  il problema è simile a trovare un insieme rappresentativo con una metrica submodulare di hamming dato che è un'approssimazione valida di $D_f(A,S)$\\
  $D^{\textnormal{SHA}}(A,S)$ è submodulare in $A$ per $S$ fisso\\
  Si trova una soluzione in tempo polinomiale con approssimazione di $1-\kappa_f$
\end{frame}


\hiddensection{Domande}
\begin{frame}
% left empty
\end{frame}

\begin{frame}
  Se otteniamo $S_k$ dall'algoritmo greedy allora:
  \[ f(S^k) \ge (1-\frac{1}{e}) \cdot f(O) \]
  \begin{dimostrazione}
    \[ f(S^{k+1}) - f(S^k) \ge \frac{1}{k}(f(O) - f(S^k)), \forall 0 \le i \le k \]
    a ogni passaggio l'incremento non è troppo piccolo\\
    perchè se $O = \{o_1, \ldots, o_k\}$ per ogni $i$ si ha monotonicità:
  \end{dimostrazione}
\end{frame}

\begin{frame}
  \begin{dimostrazione}
    \begin{equation*}
      \begin{aligned}
        f(O) &\le f(O \cup S^k)\\
        &= f(S^k) + \sum_{j=1}^{k}\left( f(S^k \cup \{ o_1, \ldots, o_j\}) - f(S^k \cup \{o_1,\ldots, o_{j-1}\}) \right)\\
        &= f(S^k) + \sum_{j=1}^{k}\left( f(\{o_j\}|S^k \cup \{o_1,\ldots, o_{j-1}\}) \right)\\
        &\le f(S^k) + \sum_{j=1}^{k}\left( f(\{o_j\}|S^k) \right) \\
        &\le f(S^k) + k\cdot f(\{i(t)\}|S^{k})
      \end{aligned}
    \end{equation*}
  \end{dimostrazione}
\end{frame}
\begin{frame}
  \begin{dimostrazione}
    cioè:
    \[ \left(f(O)-f(S^k)\right) - \left(f(O)-f(S^{k+1})\right) \ge \frac{1}{k}\left(f(O)-f(S^k)\right) \]
    \[ f(O)-f(S^{k+1}) \le \left( 1 -\frac{1}{k} \right)(f(O)-f(S^*k)) \]
    per induzione:
    \[ f(O)-f(S^{k+1}) \le {\left( 1 -\frac{1}{k} \right)}^{k}(f(O)-f(S^0)) \le \frac{1}{e} f(O) \]
  \end{dimostrazione}
\end{frame}

\end{document}