First draft report, removing exercises

This commit is contained in:
elvis
2025-01-15 00:09:58 +01:00
parent 9b48cc005f
commit 11adaa5103
11 changed files with 624 additions and 294 deletions

View File

@ -1,15 +1,3 @@
(executable
(name main)
(public_name main)
(libraries exercises
miniImp
miniFun
analysis
utility)
(package miniImp)
(modes byte exe)
)
(executable (executable
(name miniFunInterpreter) (name miniFunInterpreter)
(public_name miniFunInterpreter) (public_name miniFunInterpreter)

View File

@ -1,79 +0,0 @@
open MiniImp
let colorred s =
"\027[31m" ^ s ^ "\027[0m"
let () =
let program = "
def main with input in output out as
out := in;
a := 1;
b := 2;
c := a + a;
"
in
(* Printf.printf "%s\n%s\n" (colorred "Program is") program; *)
let get_result x = Lexing.from_string x |> Parser.prg Lexer.lex in
let p = get_result program in
(* Format.printf "%s\n%a\n@?" (colorred "AST is") Types.pp_p_exp p; *)
let convertedcfg = CfgImp.convert_io 10 p in
(* Printf.printf "%s\n%a" (colorred "Converted CFG is") CfgImp.SSCfg.pp convertedcfg; *)
let convertedrisccfg = CfgRISC.convert convertedcfg in
Printf.printf "%s\n%a" (colorred "Converted RISC CFG is") CfgRISC.RISCCfg.pp convertedrisccfg;
(* ---------------------------------- *)
let analysiscfg = DefinedVariables.compute_defined_variables convertedrisccfg in
(* Printf.printf "%s\n%a" (colorred "Analysis CFG is") DefinedVariables.DVCfg.pp analysiscfg; *)
(* Printf.printf "%s" (colorred "Undefined Variables are:"); *)
(* ( *)
(* match DefinedVariables.check_undefined_variables analysiscfg with *)
(* | None -> Printf.printf " none"; *)
(* | Some l -> Printf.printf " %a" DefinedVariables.Variable.pplist l; *)
(* ); *)
(* Printf.printf "\n"; *)
let convertedrisccfg = DefinedVariables.compute_cfg analysiscfg in
(* Printf.printf "%s\n%a" (colorred "Converted RISC after analysis CFG is") CfgRISC.RISCCfg.pp convertedrisccfg; *)
(* let analysiscfg = LiveVariables.compute_live_variables convertedrisccfg in *)
(* Printf.printf "%s\n%a" (colorred "Live Analysis CFG is") LiveVariables.DVCfg.pp analysiscfg; *)
(* let convertedrisccfg = LiveVariables.compute_cfg analysiscfg in *)
(* Printf.printf "%s\n%a" (colorred "Converted RISC with no analysis CFG is") CfgRISC.RISCCfg.pp convertedrisccfg; *)
(* let convertedrisccfg = LiveVariables.compute_cfg (LiveVariables.optimize_cfg analysiscfg) in *)
(* Printf.printf "%s\n%a" (colorred "Converted RISC after analysis CFG is") CfgRISC.RISCCfg.pp convertedrisccfg; *)
let convertedrisccfg = ReduceRegisters.reduceregisters 4 convertedrisccfg in
Printf.printf "%s\n%a" (colorred "Converted RISC after reducing registers CFG is") CfgRISC.RISCCfg.pp convertedrisccfg;
(* ---------------------------------- *)
let risc = RISC.convert convertedrisccfg in
Printf.printf "%s\n%a" (colorred "RISC code is") RISC.RISCAssembly.pp risc;
let computerisc = RISCSemantics.reduce risc in
Printf.printf "%s\n%d\n" (colorred "Output of RISC code is") computerisc;
()

View File

@ -1,25 +0,0 @@
def main with input n output result as
if (n % 2) == 0 then result := 1
else (
result := 0;
s := 0;
while (0 == ((n - 1) / (2 ^ s)) % 2) do (
s := s + 1
);
d := ((n - 1) / 2 ^ s);
for (i := 20, i > 0, i := i - 1) do (
a := rand(n - 4) + 2;
x := powmod(a, d, n);
y := 0;
for (j := 0, j < s, j := j+1) do (
y := powmod(x, 2, n);
if (y == 1 && (not x == 1) && (not x == n - 1)) then
result := 1;
else
skip;
x := y;
);
if not y == 1 then result := 1;
else skip;
)
)

View File

@ -21,7 +21,3 @@
(package (package
(name miniFun) (name miniFun)
(depends ocaml dune utility)) (depends ocaml dune utility))
(package
(name exercises)
(depends ocaml dune))

View File

@ -1,5 +0,0 @@
(library
(name exercises)
(public_name exercises))
(include_subdirs qualified)

View File

@ -1,109 +0,0 @@
type a_exp =
Aval of int
| Plus of a_exp * a_exp
| Minus of a_exp * a_exp
| Times of a_exp * a_exp
| Of_bool of b_exp
and b_exp =
Bval of bool
| And of b_exp * b_exp
| Or of b_exp * b_exp
| Not of b_exp
| Minor of a_exp * a_exp
let rec eval_a_exp node =
match node with
Aval (i) -> i
| Plus (i, j) -> (eval_a_exp i) + (eval_a_exp j)
| Minus (i, j) -> (eval_a_exp i) - (eval_a_exp j)
| Times (i, j) -> (eval_a_exp i) * (eval_a_exp j)
| Of_bool b -> if (eval_b_exp b) then 1 else 0
and eval_b_exp node =
match node with
Bval (b) -> b
| And (a, b) -> (eval_b_exp a) && (eval_b_exp b)
| Or (a, b) -> (eval_b_exp a) || (eval_b_exp b)
| Not b -> not (eval_b_exp b)
| Minor (i, j) -> (eval_a_exp i) < (eval_a_exp j)
type 'a my_tree =
Leaf of 'a
| Node of ('a my_tree) list
let mod_list y =
(List.fold_left
(fun acc x ->
match acc with
| [a] when ((List.hd a) = x) -> [x :: a]
| a :: tl when ((List.hd a) = x) -> (x :: a) :: tl
| _ -> [x] :: acc)
[]
y)
|> List.rev
(* -------------------------------------------------------------------------- *)
let to_tup f g =
fun x -> match x with
(a, b) -> (f a, g b)
let partialsum l =
snd (List.fold_left_map (fun acc x -> (acc+x, acc+x)) 0 l)
type label =
string
type 'a finite_state_automata = {
l: label;
next: ('a finite_state_automata * 'a list) list;
final: bool;
}
let rec check_included input fsa =
match input with
[] -> fsa.final
| a::rest -> (
match List.find_opt (fun x -> List.mem a (snd x)) fsa.next with
None -> false
| Some x -> check_included rest (fst x)
)
(* -------------------------------------------------------------------------- *)
module StringMap = Map.Make(String)
type fsa = {
vertices: bool StringMap.t;
edges: (string * char) StringMap.t;
state: string;
}
let ex8 (instr: char list) (infsa: fsa) =
let rec helper_ex8 (i: char list) (ifsa: fsa) (current: string) =
match i with
[] -> (
match StringMap.find_opt current ifsa.vertices with
None -> false
| Some b -> b
)
| a::rest -> (
match StringMap.find_first_opt (fun _ -> true) (StringMap.filter (fun x (_, y) -> x = current && y = a) ifsa.edges) with
None -> false
| Some (_, (outedge, _)) -> helper_ex8 rest ifsa outedge
)
in helper_ex8 instr infsa infsa.state
type binary_tree =
Node of binary_tree * binary_tree
| Leaf of int
let ex9 b =
let rec helper_ex9 b' n =
match b' with
Leaf a -> a + n
| Node (r, l) -> (helper_ex9 r (helper_ex9 l n))
in helper_ex9 b 0
(* -------------------------------------------------------------------------- *)

View File

@ -1,60 +0,0 @@
type a_exp =
Aval of int
| Plus of a_exp * a_exp
| Minus of a_exp * a_exp
| Times of a_exp * a_exp
| Of_bool of b_exp
and b_exp =
Bval of bool
| And of b_exp * b_exp
| Or of b_exp * b_exp
| Not of b_exp
| Minor of a_exp * a_exp
val eval_a_exp: a_exp -> int
val eval_b_exp: b_exp -> bool
type 'a my_tree =
Leaf of 'a
| Node of ('a my_tree) list
val mod_list: 'a list -> 'a list list
(* --------------------------------------------------------------------------- *)
val to_tup : ('a -> 'b) -> ('c -> 'd) -> (('a * 'c) -> ('b * 'd))
val partialsum : int list -> int list
type label =
string
type 'a finite_state_automata = {
l: label;
next: ('a finite_state_automata * 'a list) list;
final: bool;
}
val check_included : 'a list -> 'a finite_state_automata -> bool
(* -------------------------------------------------------------------------- *)
module StringMap : Map.S with type key = string
type fsa = {
vertices: bool StringMap.t;
edges: (string * char) StringMap.t;
state: string;
}
val ex8 : char list -> fsa -> bool
type binary_tree =
Node of binary_tree * binary_tree
| Leaf of int
val ex9 : binary_tree -> int
(* -------------------------------------------------------------------------- *)

0
report/document.bib Normal file
View File

BIN
report/document.pdf Normal file

Binary file not shown.

199
report/document.tex Normal file
View File

@ -0,0 +1,199 @@
\documentclass[12pt, oneside]{article}
%% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - %%
%% Load Packages %%
%% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - %%
\usepackage[
top=2cm,
bottom=2cm,
left=2cm,
right=2cm,
headheight=20pt,
centering
]{geometry}
\geometry{a4paper}
\usepackage[utf8]{inputenc} %% use UTF-8, maybe not needed since 2018
\usepackage[italian,main=english]{babel} %% language
\pagestyle{headings}
\usepackage{scrlayer-scrpage}
\usepackage{csquotes} %% correct language also for citations
\ifoot[]{}
\cfoot[]{}
\ofoot[\pagemark]{\pagemark}
\pagestyle{scrplain}
\usepackage[
backend=biber,
style=numeric,
sorting=ynt
]{biblatex} %% for citations
\addbibresource{document.bib}
\usepackage{import} %% specify path for import
%% math packages
\usepackage{graphicx} %% for pictures
\usepackage{float}
\usepackage{amssymb} %% math symbols
\usepackage{amsmath} %% math matrix etc
\usepackage{listings} %% code block
\usepackage{tabularray} %% better tables
\usepackage{booktabs} %% rules for tables
\usepackage{mathrsfs}
\usepackage{mathtools}
\usepackage{algorithm} %% for algorithms
\usepackage{algpseudocode} %% loads algorithmicx
\usepackage{amsthm}
\usepackage{thmtools} %% theorems
\usepackage{syntax} %% BNF
\usepackage{semantic} %% semantics
%% plot packages
\usepackage{pgfplots} %% plots used with \begin{tikzpicture}
\usepackage{tikz} %% for pictures
\usetikzlibrary{trees,chains,shadows.blur,fit}
\pgfplotsset{width=10cm,compat=newest}
%% design packages
\usepackage{enumitem} %% for lists and enumerating
\usepackage{color}
\usepackage{xcolor,colortbl} % xcolor for defining colors, colortbl for table colors
\usepackage{makecell} %% for multiple lines in cell of table
\usepackage{cancel}
\usepackage{pgfornament} %% ornaments
%% load last
\usepackage[hidelinks]{hyperref} %% links for table of contents, load last
\usepackage{bookmark} %% for better table of contents
%% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - %%
%% Configuration of the packages %%
%% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - %%
%% \linespread{1}
\raggedbottom %% spaces if page is empty % chktex 1
%% set max table of contents recursion to subsection (3->subsubsecition)
\setcounter{tocdepth}{3}
\setcounter{secnumdepth}{3}
%% use bar instead of arrow for vectors
\renewcommand{\vec}[1]{\bar{#1}}
%% easy norm
\newcommand{\norm}[1]{\left\lvert#1\right\rvert}
% argmin and argmax
\DeclareMathOperator*{\argmax}{argmax}
\DeclareMathOperator*{\argmin}{argmin}
%% itemize use less vertical space (use olditemize for default behaviour)
\let\olditemize=\itemize%% old itemize
\let\endolditemize=\enditemize%% old end itemize
\renewenvironment{itemize}{\olditemize\itemsep-0.2em}{\endolditemize}
%% items in itemize emph+box
%% usage: \ieb{Class:} for simple item
%% \ieb[4cm]{Class:} for specific size of box
\newcommand{\ieb}[2][2cm]{
\makebox[#1][l]{\emph{#2}}
} %% TODO: replace with description environment (? maybe)
% less vertical space around align & align*
\newcommand{\zerodisplayskips}{
\setlength{\abovedisplayskip}{0pt}
\setlength{\belowdisplayskip}{0pt}
\setlength{\abovedisplayshortskip}{0pt}
\setlength{\belowdisplayshortskip}{0pt}
}
% make dotfill use all the space available
\renewcommand{\dotfill}{
\leavevmode\cleaders\hbox to 1.00em{\hss .\hss }\hfill\kern0pt } % chktex 1 chktex 26
\setlength{\fboxsep}{-\fboxrule} % for debugging
%% PACKAGE algorithm
\floatname{algorithm}{Algorithm}
%% PACKAGE tabularray
\UseTblrLibrary{amsmath}
%% PACKAGE color
\definecolor{red}{rgb}{1, 0.1, 0.1}
\definecolor{lightgreen}{rgb}{0.55, 0.87, 0.47}
\definecolor{gray}{rgb}{0.3, 0.3, 0.3}
\newcommand{\lgt}{\cellcolor{lightgreen}} %% light green in tables
\newcommand{\gry}{\textcolor{gray}} %% gray text
\newcommand{\rd}{\textcolor{red}} %% red text
%% PACKAGE minipage
\newcommand{\thend}[1]{\begin{center}
\begin{minipage}[c][1em][c]{#1}
\dotfill{}
\end{minipage}
\end{center}}
%% PACKAGE thmtools
%% ......................................................................... %%
%% local changes
% \setcounter{secnumdepth}{0}
\newcommand{\defeq}{\vcentcolon=}
\lstdefinelanguage{miniimp}{
keywords={if, then, else, skip, while, do, for, rand},
keywordstyle=\color{blue}\bfseries,
identifierstyle=\color{black},
sensitive=false,
morecomment=[s]{(*}{*)}, % chktex 9
commentstyle=\color{gray}\ttfamily,
stringstyle=\color{red}\ttfamily,
escapeinside={£}{£},
numbers=left,
stepnumber=1
}
\lstset{
language=miniimp,
extendedchars=true,
basicstyle=\footnotesize\ttfamily,
showstringspaces=false,
showspaces=false,
tabsize=2,
breaklines=true,
showtabs=false
}
%% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - %%
\title{Document}
\author{
Elvis Rossi
}
\date{\today}
%% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - %%
\begin{document}
\input{report}
\end{document}
%% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - %%
%%% Local Variables:
%%% TeX-command-extra-options: "-shell-escape"
%%% End:

425
report/report.tex Normal file
View File

@ -0,0 +1,425 @@
\begin{section}{Semantics}
\begin{subsection}{MiniImp}
The semantic of the MiniImp language is implemented in the \href{../lib/miniImp/Semantics.mli}{Semantics.mli} and \href{../lib/miniImp/Semantics.ml}{Semantics.ml} file.
A \texttt{reduce} function is provided that transforms an AST into the evaluated value or an error.
The AST type is defined in \href{../lib/miniImp/Types.mli}{Types.mli} and in \href{../lib/miniImp/Types.ml}{Types.ml}.
A program \texttt{p} is defined as follows:
\begin{grammar}
<p> \(\defeq\) `def main with input' <x> `output' <y> as <c>
<c> \(\defeq\) skip
\alt{} <x> `:=' <a>
\alt{} <c> `;' <c>
\alt{} `if' <b> `then' <c> `else' <c>
\alt{} `while' <b> `do' <c>
\alt{} `for' `(' <c> `,' <b> `,' <c> `)' `do' <c>
<b> \(\defeq\) <v> | <b> `\&\&' <b> | <b> `||' <b> | `not' <b>
\alt{} <a> `<' <a> | <a> `<=' <a> | <a> `>' <a> | <a> `>=' <a>
\alt{} <a> `==' <a>
<a> \(\defeq\) <x> | <n> | <a> `+' <a> | <a> `-' <a> | <a> `*' <a> | <a> `/' a
\alt{} <a> `\%' <a> | <a> `^' <a> | `powmod' `(' <a> `,' <a> `,' <a> `)' | `rand' `(' <a> `)'
\end{grammar}
Where \texttt{\%} is the modulo operator and \texttt{a a \% a} is the powermod operator;
the variables are all integers, \texttt{n} is an integer and \texttt{v} is a boolean litteral.
The additional arithmetic expressions' semantics are implemented in a similar manner as with the other.
The sematic of \texttt{for} is as follows:
\begin{center}
\inference[\texttt{for}]
{\langle\sigma, c_1\rangle \to_c \sigma_1 & \langle\sigma_1, \texttt{while } b \texttt{ do } c_3 \texttt{; } c_2 \rangle \to_c \sigma_2} % chktex 1
{\langle\sigma, \texttt{for} \texttt{(} c_1 \texttt{, } b \texttt{, } c_2 \texttt{)} \texttt{ do } c_3 \rangle \to_c \sigma_2} % chktex 1 chktex 9
\end{center}
but the implementation exploits the structure and doesn't simply rewrite the for loop as a while loop.
\end{subsection}
\begin{subsection}{MiniFun Semantics}
The semantic of the MiniFun language is implemented in the \href{../lib/miniFun/Semantics.mli}{Semantics.mli} and \href{../lib/miniFun/Semantics.ml}{Semantics.ml} file.
A \texttt{reduce} function is provided that transforms the AST into the avluated value or an error.
The AST type is defined in \href{../lib/miniFun/Types.mli}{Types.mli} and in \href{../lib/miniFun/Types.ml}{Types.ml}.
A program \texttt{t} is defined as follows:
\begin{grammar}
<t> \(\defeq\) <n> | <v> | <x> | `(' <t> `,' <t> `)'
\alt{} `fun' <x> `:' <type> `=>' <t> | <t> <t> | <op\textsubscript{1}> <t> | <t> <op\textsubscript{2}> <t> % chktex 38
\alt{} `powmod' `(' <t> `,' <t> `,' <t> `)'
\alt{} `rand' `(' <t> `)' |
\alt{} `if' <t> `then' <t> `else' <t>
\alt{} `let' <x> `=' <t> `in' <t>
\alt{} `let' `rec' <x> <y> `: ' <type> `=' <t> `in' <t>
<op\textsubscript{1}> \(\defeq\) `not' | `fst' | `scn'
<op\textsubscript{2}> \(\defeq\) `+' | `-' | `*' | `/' | `\%' | `^' | `\&\&' | `||' | `=='
\alt{} `<' | `<=' | `>' | `>='
\end{grammar}
As reflected in the grammar, tuples have been implemented and the unary functions fst and scn return respectively the first element of the tuple and the second.
\end{subsection}
\end{section}
\begin{section}{Types for MiniFun}
A type \(\tau\) is defined as either {\it int}, {\it bool}, a touple or a function.
\begin{equation*}
\tau \defeq {\it int\/}\ \vert\ {\it bool\/}\ \vert\ (\tau,\tau)\ \vert\ \tau \to \tau
\end{equation*}
The deduction rules regarding tuples are similar to those for functions:
\begin{center}
\inference[\texttt{Tuple}]
{\Gamma \vdash t_1 \triangleright \tau_1 & \Gamma \vdash t_2 \triangleright \tau_2} % chktex 1
{\Gamma \vdash (t_1, t_2) \triangleright \tau_1 * \tau_2} % chktex 1
\end{center}
\begin{center}
\inference[\texttt{Fst}]
{\Gamma \vdash t_1 \triangleright \tau_1 } % chktex 1
{\Gamma \vdash \texttt{fst} (t_1, t_2) \triangleright \tau_1} % chktex 1
\end{center}
\begin{center}
\inference[\texttt{Snd}]
{\Gamma \vdash t_2 \triangleright \tau_2 } % chktex 1
{\Gamma \vdash \texttt{snd} (t_1, t_2) \triangleright \tau_2} % chktex 1
\end{center}
The rules for function declaration with type annotations are thus:
\begin{center}
\inference[\texttt{Fun}]
{\Gamma[x \mapsto \tau] \vdash t \triangleright \tau'} % chktex 1
{\Gamma \vdash \texttt{fun} x \texttt{:} \tau \to \tau' \texttt{=>} t \triangleright \tau \to \tau'} % chktex 1
\end{center}
\begin{center}
\inference[\texttt{FunRec}]
{\Gamma[f \mapsto \tau \to \tau'; x \mapsto \tau] \vdash t_1 \triangleright \tau' & \Gamma[f \mapsto \tau \to \tau'] \vdash t_2 \triangleright \tau''} % chktex 1
{\Gamma \vdash \texttt{let rec} f x \texttt{:} \tau \to \tau' \texttt{ = } t_1 \texttt{ in } t_2 \triangleright \tau''} % chktex 1
\end{center}
In the files \href{../lib/miniFun/TypeChecker.mli}{TypeChecker.mli} and \href{../lib/miniFun/TypeChecker.ml}{TypeChecker.ml} there is the implementation of the deduction rules, but returns either the valid type of the expression or an error instead of simply the required option type of the valid type.
\end{section}
\begin{section}{Parsing}
\begin{subsection}{MiniImp}
Operators listed in order of precedence from highest to lowest:
\begin{center}
\begin{tblr}{colspec={|c|c|}, rowspec={|Q|QQQQQQQ|}}
Operator & Associativity \\
while & left \\
\^{} & right \\
* / mod & left \\
not & {-} \\
+ {-} \(\vert\vert\) \&\& & left \\
if & left \\
{;} & left \\
\end{tblr}
\end{center}
The expressions \(c_1 \texttt{;} c_2\) and \(c_1 \texttt{;}\) are both recognized and give respectively \(\texttt{SEQUENCE(} c_1 \texttt{,} c_2 \texttt{)}\) % chktex 9
and \(c_1\), such that semicolons can be placed always at the end of a command.
Integers with a preceding minus sign can be interpreted as the opposite integer, with obviously lower precedence than the binary operator minus.
\end{subsection}
\begin{subsection}{MiniFun}
A decision was made to interpret \texttt{\textbackslash}, \texttt{lambda} and \texttt{fun} all as the start of the definition of a function just for ease of typing. They are associated to the same token \texttt{LAMBDA}.
Operators listed in order of precedence from highest to lowest:
\begin{center}
\begin{tblr}{colspec={|c|c|}, rowspec={|Q|QQQQQQQQQQQQ|}}
Operator & Associativity \\
function application & right \\
let & left \\
fun & left \\
fst snd & left \\
not rand & {-} \\
\^{} & right \\
* / mod & left \\
+ {--} & left \\
== {\(<\)} {\(\leq\)} {\(>\)} {\(\geq\)} & left \\
\(\vert\vert\) \&\& & left \\
powmod & left \\
\(\lambda\) if let letrec & left \\
\end{tblr}
\end{center}
Tuples require parentesis in their definition, but the tuple type does not since there is no ambiguity. The symbol \texttt{->} that defines the function type is right associative and has lowest precedence.
\end{subsection}
\begin{subsection}{Interpreters}
Both MiniImp and MiniFun have each an interpreter (\href{../bin/miniFunInterpreter.ml}{miniFunInterpreter.ml} and \href{../bin/miniFunInterpreter.ml}{miniFunInterpreter.ml}) that uses the package \href{https://opam.ocaml.org/packages/clap/}{Clap} to parse command line arguments and generate help pages.
The input to the program can be supplied both in stdin or as a command parameter after \texttt{-v}. The MiniFun interpreter also check the types before computing the output of the program and returns an error in case the types mismatch.
\end{subsection}
\end{section}
\begin{section}{Control Flow Graph}
The control flow graph data structure is implemented in the analysis library in the files \href{../lib/analysis/Cfg.ml}{Cfg.ml} and \href{../lib/analysis/Cfg.mli}{Cfg.mli}.
Each node contains only an id to distinguish from others.
The control flow structure is composed of a flag to know if it is empty or contains nodes and the set of all contained nodes.
Since each node can only have at maximum 2 nodes as next nodes, the data structure contains a map from each node to a tuple of the two nodes or to a node.
The structure also contains the back edges of each node implemented as a map from each node to a list of nodes, the input value, the variables that are the input and ouput, the initial node and the terminal node.
Finally there is a map from each node to a list of generic elements that in our case are simple statements.
\begin{subsection}{MiniImp Simple Statement}
MiniImp Simple Statements \(t\) is defined as follows:
\begin{grammar}
<t> \(\defeq\) skip | <x> `:=' <a> | <b> `{?}'
<b> \(\defeq\) <v> | <b> `\&\&' <b> | <b> `||' <b> | `not' <b>
\alt{} <a> `==' <a> | <a> `<' <a> | <a> `<=' <a> | <a> `>' <a> | <a> `>=' <a>
<a> \(\defeq\) <n> | <x> | <a> `+' <a> | <a> `-' <a> | <a> `*' <a> | <a> `/' <a>
\alt{} <a> `mod' <a> | <a> `^' <a> | `rand' <a>
\end{grammar}
The implemented cfg is neither minimal nor maximal, but can be either or both for some programs. In particular each node as associated a list of statements and sequence of statements in the AST is put, if possible, in the same node.
\texttt{?} is only allowed as the last element of the list of statemets associated with a node and a node has associated a \texttt{?} if and only if they have two next nodes.
The for loop is translated as:
\begin{center}
\begin{tikzpicture}[
node/.style = {draw,rounded corners,blur shadow, fill=white,align=center},
box/.style={rectangle,draw,inner sep=10pt}
]
\node[node] at (0, 1.3) (b11) {$i_1^1$};
\node[node] at (0, 0) (b12) {$f_1^1$};
\node[box,fit = (b11) (b12)] (externalbox1) {};
\node[node, opacity=0] at (3, 1.3) (bb1) {$i_1^b$};
\node[node, opacity=0] at (3, 0) (bb2) {$f_1^b$};
\node[node] at (3, 0.65) (bball) {$i^b$};
\node[box,fit = (bb1) (bb2)] (externalboxb) {};
\node[node] at (6, 1.3) (b21) {$i_1^2$};
\node[node] at (6, 0) (b22) {$f_1^2$};
\node[box,fit = (b21) (b22)] (externalbox2) {};
\node[node] at (9, 1.3) (b31) {$i_1^3$};
\node[node] at (9, 0) (b32) {$f_1^3$};
\node[box,fit = (b31) (b32)] (externalbox3) {};
\node[fit = (externalbox1) (externalbox2) (externalbox3) (externalboxb)] (boxall) {};
\begin{scope}[rounded corners,-latex]
\path (externalbox1) edge (b11.north);
\path[dashed] (b11) edge (b12);
\path (b12) edge (externalbox1);
\path (externalboxb) edge (bball.north);
\path (bball) edge (externalboxb.south);
\path (externalbox2) edge (b21.north);
\path[dashed] (b21) edge (b22);
\path (b22) edge (externalbox2);
\path (externalbox3) edge (b31.north);
\path[dashed] (b31) edge (b32);
\path (b32) edge (externalbox3);
\end{scope}
\node[above] at (boxall.north) {\texttt{for (}\(c_1\)\texttt{,} \(b\)\texttt{,} \(c_2\)\texttt{) do} \(c_3\)};
\node[left] at (externalbox1.west) {\(c_1\):};
\node[left] at (externalboxb.west) {\(b\):};
\node[left] at (externalbox2.west) {\(c_2\):};
\node[left] at (externalbox3.west) {\(c_3\):};
\node[node] at (4.5, -2.7) (b11) {$i_1^1$};
\node[node] at (4.5, -4) (b12) {$f_1^1$};
\node[node] at (4.5, -5.3) (guard) {$i^b$};
\node[node] at (4.5, -6.6) (b31) {$i_1^3$};
\node[node] at (4.5, -7.9) (b32) {$f_1^3$};
\node[node] at (4.5, -9.2) (b21) {$i_1^2$};
\node[node] at (4.5, -10.5) (b22) {$f_1^2$};
\node[node] at (4.5, -11.8) (exitnode) {\texttt{skip}};
\node[box, fit = (b11) (b12) (guard) (b31) (b32) (b21) (b22) (exitnode),
inner sep=15pt] (externalboxall) {};
\begin{scope}[rounded corners,-latex]
\path[dashed] (b11) edge (b12);
\path[dashed] (b21) edge (b22);
\path[dashed] (b31) edge (b32);
\path (b12) edge (guard);
\path (guard) edge (b31);
\path (b32) edge (b21);
\path (b22.135) edge[bend left=20] (guard.220);
\path (guard.-40) edge[bend left=15] (exitnode.35);
\path (externalboxall.north) edge (b11.north);
\path (exitnode.south) edge (externalboxall.south);
\end{scope}
\node[above] at (externalboxall.north) {becomes:};
\end{tikzpicture}
\end{center}
We highlight the fact that the operation powermod is absent in the grammar of simple statements. In fact all powermod are replaced in the AST before translating into CFG with the function \texttt{rewrite_instructions} in \href{../lib/miniImp/replacePowerMod.ml}{replacePowerMod.ml} and \href{../lib/miniImp/replacePowerMod.mli}{replacePowerMod.mli}.
\texttt{powmod(}\(a_1\)\texttt{, }\(a_2\)\texttt{, }\(a_3\)\texttt{)} % chktex 9 chktex 36
is translated into:
\begin{lstlisting}
pow := £\(a_1\)£;
exp := £\(a_2\)£;
mod := £\(a_3\)£;
res := 1;
if exp < 0 then
exp := 0 - exp;
else
skip;
while exp > 0 do (
if 1 = exp % 2 then
res := (res * pow) % mod;
else
skip;
pow := (pow * pow) % mod;
exp := exp / 2;
)
\end{lstlisting}
The variables \texttt{pow}, \texttt{exp}, \texttt{mod} and \texttt{res} are all fresh and the value of res is then substituted into powmod place. This might need some more scope than only the expression since \texttt{powmod} may be included in a \texttt{if} guard, thus it is placed before the \texttt{if}; in case it is in the guard of a \texttt{while} or a \texttt{for} loop it is also updated at the end of the body.
The reason for substituting \texttt{powmod} in the AST is that we would need to add nodes to form the \texttt{if} and \texttt{while} and it would prove more difficult.
\end{subsection}
\end{section}
\begin{section}{Intermediate Code Generation}
\begin{subsection}{MiniRISC CFG}
In the files \href{../lib/miniImp/CfgRISC.ml}{CfgRISC.ml} and \href{../lib/miniImp/CfgRISC.mli}{CfgRISC.mli} the CFG generated from the AST gets translated into intermediate code with the following MiniRISC simple statements:
\begin{grammar}\label{grammar:MiniRISC}
<t> \(\defeq\) Nop
\alt{} BRegOp <brop> <r> <r> \(\Rightarrow\) <r>
\alt{} BImmOp <biop> <r> <n> \(\Rightarrow\) <r>
\alt{} URegOp <urop> <r> \(\Rightarrow\) <r>
\alt{} Load <r> \(\Rightarrow\) <r>
\alt{} LoadI <n> \(\Rightarrow\) <r>
\alt{} Store <r> \(\Rightarrow\) <r>
<brop> \(\defeq\) Add | Sub | Mult | Div | Mod | Pow | And | Or
\alt{} Eq | Less | LessEq | More | MoreEq
<biop> \(\defeq\) AddI | SubI | MultI | DivI | ModI | PowI | AndI | OrI
\alt{} EqI | LessI | LessEqI | MoreI | MoreEqI
<urop> \(\defeq\) Not | Copy | Rand
\end{grammar}
Since we stride towards shorter code and less instructions, we would prefer to use the \texttt{biop} version of each operation whenever possible. So for some operations that are commutative if the first term is the immediate value we swap the terms and use the \texttt{biop} variant instead of loading the value into a register and using the register for the calculation. Also some operations like \texttt{>} and \texttt{<} are opposite, so to invert the order we need to use the other \texttt{biop} version.
The input variable and the output variable are also mapped to \texttt{in} and \texttt{out} registers, while all other variables are given fresh registers.
\end{subsection}
\begin{subsection}{MiniRISC}
The MiniRISC CFG is finally tranlated into MiniRISC intermediate code by the function \texttt{convert} in the files \href{../lib/miniImp/RISC.ml}{RISC.ml} and \href{../lib/miniImp/RISC.mli}{RISC.mli}.
The grammar of MiniRISC is analogous to the one for \hyperref[grammar:MiniRISC]{MiniRISC Simple Statements}:
\begin{grammar}
<t> \(\defeq\) Nop
\alt{} BRegOp <brop> <r> <r> \(\Rightarrow\) <r>
\alt{} BImmOp <biop> <r> <n> \(\Rightarrow\) <r>
\alt{} URegOp <urop> <r> \(\Rightarrow\) <r>
\alt{} Load <r> \(\Rightarrow\) <r>
\alt{} LoadI <n> \(\Rightarrow\) <r>
\alt{} Store <r> \(\Rightarrow\) <r>
\alt{} Jump <l>
\alt{} CJump <r> <l> <l>
\alt{} Label <l>
<brop> \(\defeq\) Add | Sub | Mult | Div | Mod | Pow | And | Or
\alt{} Eq | Less | LessEq | More | MoreEq
<biop> \(\defeq\) AddI | SubI | MultI | DivI | ModI | PowI | AndI | OrI
\alt{} EqI | LessI | LessEqI | MoreI | MoreEqI
<urop> \(\defeq\) Not | Copy | Rand
\end{grammar}
where \texttt{l} is a string that uniquely identifies a label.
\end{subsection}
\begin{subsection}{RISC Semantics}
It is also implemented in the files \href{../lib/miniImp/RISCSemantics.ml}{RISCSemantics.ml} and \href{../lib/miniImp/RISCSemantics.mli}{RISCSemantics.mli} a reduce function, that evaluates MiniRISC code.
The labels are used as is and not replaced by offsets, so the code is translated into a map from labels to code blocks for ease of computation.
\end{subsection}
\end{section}
\begin{section}{Dataflow Analysis}
A refined CFG structure used for analysis is defined in \href{../lib/analysis/Dataflow.ml}{Dataflow.ml} and \href{../lib/analysis/Dataflow.mli}{Dataflow.mli}. The CFG is supplemented with a map from each node to the support structure that stores the list of defined variables or live variables. Since the CFG is not minimal, there is also a list for each simple statement. A fixed point function then applies the input fuction until the map does not change. Simple structural equality is not appropriate since order in the lists should not matter; an internal function for equality is used.
\begin{subsection}{Defined Variables}
In the files \href{../lib/miniImp/definedVariables.ml}{definedVariables.ml} and \href{../lib/miniImp/definedVariables.mli}{definedVariables.mli} three functions are defined: \texttt{compute_defined_variables}, \texttt{compute_cfg} and \texttt{check_undefined_variables}.
\texttt{compute_defined_variables} creates the appropriate structure for the analysis and runs it. It returns the whole analysis structure.
\texttt{compute_cfg} returns the CFG from the analysis data structure; in the case of defined variables analysis the CFG returned is the same as the one in input of \texttt{compute_defined_variables}.
\texttt{check_undefined_variables} returns all variables that might be undefined at time of use.
Since the greatest fixed point is computed, first all variables are retrived from all code, then assigned to each input and ouput list of variables for each line of code.
Since it is an approximation some behaviour might not be intuitive. For example:
\begin{lstlisting}
for (x := 0, x < 10, x := x + 1) do (
y := rand(x);
);
output := y;
\end{lstlisting}
will return the register associated with \texttt{y} as undefined since the guard of the for cycle might never be true.
\end{subsection}
\begin{subsection}{Live Variables}
In the files \href{../lib/miniImp/liveVariables.ml}{liveVariables.ml} and \href{../lib/miniImp/liveVariables.mli}{liveVariables.mli} three functions are defined: \texttt{compute_live_variables}, \texttt{compute_cfg} and \texttt{optimize_cfg}.
\texttt{compute_live_variables} creates the appropriate structure for the analysis and runs it. It returns the whole analysis structure.
\texttt{compute_cfg} returns the CFG from the analysis data structure.
\texttt{optimize_cfg} applies liveness analysis to reduce the number of registers used; returns the analysis structure (not the RISC CFG).
\end{subsection}
\end{section}
\begin{section}{Target Code Generation}
In the files \href{../lib/miniImp/reduceRegisters.ml}{reduceRegisters.ml} and \href{../lib/miniImp/reduceRegisters.mli}{reduceRegisters.mli} the function \texttt{reduceregisters} reduces the number of used registers by counting the syntactic occurrence of each variable and partitioning the set keeping the most used as registers. All registers are either renamed or put into memory. It is allowed for the input or output registers to be put in memory, in the latter case some code is added at the end of the program to retrive the value and put into a register (register \texttt{2}).
\begin{subsection}{MiniImp to MiniRISC compiler}
The file \href{../bin/miniImpInterpreterReg.ml}{miniImpInterpreterReg.ml} compiles from MiniImp to MiniRISC or execute the MiniRISC code. It uses the package \href{https://opam.ocaml.org/packages/clap/}{Clap} to parse command line arguments and generate help pages.
The input to the program can be supplied both in stdin or as a command parameter after \texttt{-v}. The flags for disabling the check for undefined variables or liveness analysis optimization are \texttt{-u} and \texttt{-l} respectively.
\end{subsection}
\end{section}
\begin{section}{Running the code}
The project uses the following packages: \href{https://dune.build/}{Dune}, \href{https://gallium.inria.fr/~fpottier/menhir/}{Menhir} and \href{https://github.com/rbardou/clap}{Clap}. They can be installed via \href{https://opam.ocaml.org/}{Opam} with the command \texttt{opam install dune menhir clap}.
To compile the project simply run \texttt{dune build}. To run the test run \texttt{dune runtest}.
In order to execute one of the interpreters run \texttt{dune exec <interpreter> {-}{-} <flags and options>}. To see a list of all options run \texttt{dune exec <interpreter> {-}{-} -h}. A binary version of the executables can also be found in \href{./_build/default/bin/}{./_build/default/bin/}.
\end{section}
%%% Local Variables:
%%% TeX-command-extra-options: "-shell-escape"
%%% TeX-master: "document.tex"
%%% End: