fixed some latex warnings

This commit is contained in:
elvis
2023-08-29 15:34:45 +02:00
parent 0476329279
commit 00e921f219
2 changed files with 42 additions and 39 deletions

Binary file not shown.

View File

@ -15,7 +15,7 @@
\geometry{a4paper} \geometry{a4paper}
\usepackage[utf8]{inputenc} %% use UTF-8, maybe not needed since 2018 \usepackage[utf8]{inputenc} %% use UTF-8, maybe not needed since 2018
\usepackage[italian,main=english]{babel} %% language \usepackage[english]{babel} %% language
\pagestyle{headings} \pagestyle{headings}
@ -175,8 +175,7 @@
\usetikzlibrary{calc} \usetikzlibrary{calc}
\usepgfplotslibrary{groupplots} \usepgfplotslibrary{groupplots}
\usepackage[% \usepackage[
binary-units=true,
prefixes-as-symbols=false, prefixes-as-symbols=false,
]{siunitx} ]{siunitx}
@ -205,9 +204,9 @@ prefixes-as-symbols=false,
\pgfplotstableread[col sep=comma]{#1}{\table} \pgfplotstableread[col sep=comma]{#1}{\table}
\pgfplotstablegetcolsof{\table} \pgfplotstablegetcolsof{\table}
\pgfmathtruncatemacro\numberofcols{\pgfplotsretval-1} \pgfmathtruncatemacro\numberofcols{\pgfplotsretval-1}
\pgfplotsinvokeforeach{1,...,\numberofcols}{ \pgfplotsinvokeforeach{1,...,\numberofcols}{ % chktex 11
\pgfplotstablegetcolumnnamebyindex{##1}\of{\table}\to{\colname} \pgfplotstablegetcolumnnamebyindex{##1}\of{\table}\to{\colname}
\addplot table [y index=##1] {\table}; \addplot table [y index=##1] {\table}; % chktex 1
\addlegendentryexpanded{\colname} \addlegendentryexpanded{\colname}
} }
\addplot[mark=none, black, samples=2, domain=0:64] {1}; \addplot[mark=none, black, samples=2, domain=0:64] {1};
@ -322,31 +321,31 @@ The structure of the implementation with native C++ threads is as follows:
\begin{algorithmic}[1] \begin{algorithmic}[1]
\Procedure{stdthread}{$Input,Output$} \Procedure{stdthread}{$Input,Output$}
\For{$result \in Input$} \For{$result \in Input$}
\State $arena = result$ \State{$arena = result$}
\While{$iter>0$} \While{$iter>0$}
\For{$thread \in ThreadPool$} \For{$thread \in ThreadPool$}
\State send a new LAMBDA with appropriate bounds to the threadpool \State{send a new LAMBDA with appropriate bounds to the threadpool}
\EndFor \EndFor{}
\State swap $arena$ with $result$ \State{swap $arena$ with $result$}
\State $iter = iter - 1$ \State{$iter = iter - 1$}
\EndWhile \EndWhile{}
\State wait for the threadpool to finish \State{wait for the threadpool to finish}
\State append $result$ to $Output$ \State{append $result$ to $Output$}
\EndFor \EndFor{}
\EndProcedure \EndProcedure{}
\end{algorithmic} \end{algorithmic}
\begin{algorithmic}[1] \begin{algorithmic}[1]
\Procedure{lambda}{$l, \Delta$}\Comment \textit{$l$ is the index of block of rows, $\Delta$ is the number of rows} \Procedure{lambda}{$l, \Delta$}\Comment{\textit{$l$ is the index of block of rows, $\Delta$ is the number of rows}}
\For{$x \in \{l \cdot \Delta, \ldots, (l+1) \cdot \Delta - 1\}$} \For{$x \in \{l \cdot \Delta, \ldots, (l+1) \cdot \Delta - 1\}$}
\For{$y \in \{0, \ldots, Columns\}$} \For{$y \in \{0, \ldots, Columns\}$}
\If{$(x, y)$ not in the border} \If{$(x, y)$ not in the border}
\State calculate the neighborhood of $(x, y)$ \State{calculate the neighborhood of $(x, y)$}
\State $arena[x][y] = Stencil(neighborhood)$ \State{$arena[x][y] = Stencil(neighborhood)$}
\EndIf \EndIf{}
\EndFor \EndFor{}
\EndFor \EndFor{}
\EndProcedure \EndProcedure{}
\end{algorithmic} \end{algorithmic}
\end{algorithm} \end{algorithm}
@ -364,30 +363,31 @@ Since it is required for all jobs to finish, a condition variable is used to wak
The structure of the implementation using the FastFlow is similar to the one with native threads. The structure of the implementation using the FastFlow is similar to the one with native threads.
Since the \texttt{Stencil} class is a subclass of \texttt{ff\_Map}, the method used for the execution is \texttt{parallel\_for}. Since the \texttt{Stencil} class is a subclass of \texttt{ff\_Map}, the method used for the execution is \texttt{parallel\_for}.
A custom emitter and collector would not have been faster and so the simpler approach of inheriting the methods from \texttt{ff\_Map} was chosen. A custom emitter and collector would not have been significantly faster and so the simpler approach of inheriting the methods from \texttt{ff\_Map} was chosen.
A custom emitter would have had to split the range in the same number of blocks as number of workers and the custom collector would have had to function as a barrier for all workers.
\begin{algorithm}[H] \begin{algorithm}[H]
\begin{algorithmic}[1] \begin{algorithmic}[1]
\Procedure{fastflow}{$Task$} \Procedure{fastflow}{$Task$}
\State $arena = Task$ \State{$arena = Task$}
\While{$iter>0$} \While{$iter>0$}
\State \texttt{parallel\_for} with LAMBDA as the function to execute \State{\texttt{parallel\_for} with LAMBDA as the function to execute}
\State swap $arena$ with $Task$ \State{swap $arena$ with $Task$}
\State $iter = iter - 1$ \State{$iter = iter - 1$}
\EndWhile \EndWhile{}
\State return $Task$ \State{return $Task$}
\EndProcedure \EndProcedure{}
\end{algorithmic} \end{algorithmic}
\begin{algorithmic}[1] \begin{algorithmic}[1]
\Procedure{lambda}{$x$} \Procedure{lambda}{$x$}
\For{$y \in \{0, \ldots, Columns\}$} \For{$y \in \{0, \ldots, Columns\}$}
\If{$(x, y)$ not in the border} \If{$(x, y)$ not in the border}
\State calculate the neighborhood of $(x, y)$ \State{calculate the neighborhood of $(x, y)$}
\State $arena[x][y] = Stencil(neighborhood)$ \State{$arena[x][y] = Stencil(neighborhood)$}
\EndIf \EndIf{}
\EndFor \EndFor{}
\EndProcedure \EndProcedure{}
\end{algorithmic} \end{algorithmic}
\end{algorithm} \end{algorithm}
@ -407,11 +407,14 @@ Since
and the value of $T_{\texttt{Reader}} + T_{\texttt{Writer}}$ is known on average then the values speedup, scalability and efficiency are calculated as follows and the value of $T_{\texttt{Reader}} + T_{\texttt{Writer}}$ is known on average then the values speedup, scalability and efficiency are calculated as follows
\begingroup
\addtolength{\jot}{1em}
\begin{align*} \begin{align*}
\text{Speedup}(n) &= \frac{T_{\text{seq}}}{T_{\text{par}}(n) - (T_{\texttt{Reader}} + T_{\texttt{Writer}})} \\ \text{Speedup}(n) &= \frac{T_{\text{seq}} - (T_{\texttt{Reader}} + T_{\texttt{Writer}})}{T_{\text{par}}(n) - (T_{\texttt{Reader}} + T_{\texttt{Writer}})} \\
\text{Scalability}(n) &= \frac{T_{\text{par}}(1) - (T_{\texttt{Reader}} + T_{\texttt{Writer}})}{T_{\text{par}}(n) - (T_{\texttt{Reader}} + T_{\texttt{Writer}})} \\ \text{Scalability}(n) &= \frac{T_{\text{par}}(1) - (T_{\texttt{Reader}} + T_{\texttt{Writer}})}{T_{\text{par}}(n) - (T_{\texttt{Reader}} + T_{\texttt{Writer}})} \\
\text{Efficiency}(n) &= \frac{\text{Speedup}(n)}{n} \\ \text{Efficiency}(n) &= \frac{\text{Speedup}(n)}{n}
\end{align*} \end{align*}
\endgroup
For very small matrices the efficiency, the speedup and the scalability is very poor for both versions. For very small matrices the efficiency, the speedup and the scalability is very poor for both versions.
For larger examples instead a significant speedup is seen, but the implementation using native threads is slightly faster. For larger examples instead a significant speedup is seen, but the implementation using native threads is slightly faster.
@ -457,7 +460,7 @@ the fastflow has a peek of speedup and scalability when using 4 workers in the s
\end{center} \end{center}
The file \texttt{random400x2500} % chktex 29 The file \texttt{random400x2500} % chktex 29
performs best with 16 workers in the Fastflow implementation and slightly better at 64 workers compared to 32 workers in terms of speedup and scalability but has a significant drop in efficiency from $0.361$ to $0.184$. The relationship between number of workers and speedup is close to linear up to 8 workers. performs best with 16 workers in the Fastflow implementation and in the native thread implementation performs slightly better at 64 workers compared to 32 workers in terms of speedup and scalability, but has a significant drop in efficiency from $0.361$ to $0.184$. The relationship between number of workers and speedup is close to linear up to 8 workers.
\begin{center} \begin{center}
\begin{tikzpicture} \begin{tikzpicture}